Newer
Older
pokemon-go-trade / vendor / golang.org / x / text / encoding / htmlindex / gen.go
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// +build ignore

package main

import (
	"bytes"
	"encoding/json"
	"fmt"
	"log"
	"strings"

	"golang.org/x/text/internal/gen"
)

type group struct {
	Encodings []struct {
		Labels []string
		Name   string
	}
}

func main() {
	gen.Init()

	r := gen.Open("https://encoding.spec.whatwg.org", "whatwg", "encodings.json")
	var groups []group
	if err := json.NewDecoder(r).Decode(&groups); err != nil {
		log.Fatalf("Error reading encodings.json: %v", err)
	}

	w := &bytes.Buffer{}
	fmt.Fprintln(w, "type htmlEncoding byte")
	fmt.Fprintln(w, "const (")
	for i, g := range groups {
		for _, e := range g.Encodings {
			key := strings.ToLower(e.Name)
			name := consts[key]
			if name == "" {
				log.Fatalf("No const defined for %s.", key)
			}
			if i == 0 {
				fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
			} else {
				fmt.Fprintf(w, "%s\n", name)
			}
		}
	}
	fmt.Fprintln(w, "numEncodings")
	fmt.Fprint(w, ")\n\n")

	fmt.Fprintln(w, "var canonical = [numEncodings]string{")
	for _, g := range groups {
		for _, e := range g.Encodings {
			fmt.Fprintf(w, "%q,\n", strings.ToLower(e.Name))
		}
	}
	fmt.Fprint(w, "}\n\n")

	fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
	for _, g := range groups {
		for _, e := range g.Encodings {
			for _, l := range e.Labels {
				key := strings.ToLower(e.Name)
				name := consts[key]
				fmt.Fprintf(w, "%q: %s,\n", l, name)
			}
		}
	}
	fmt.Fprint(w, "}\n\n")

	var tags []string
	fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
	for _, loc := range locales {
		tags = append(tags, loc.tag)
		fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
	}
	fmt.Fprint(w, "}\n\n")

	fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))

	gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
}

// consts maps canonical encoding name to internal constant.
var consts = map[string]string{
	"utf-8":          "utf8",
	"ibm866":         "ibm866",
	"iso-8859-2":     "iso8859_2",
	"iso-8859-3":     "iso8859_3",
	"iso-8859-4":     "iso8859_4",
	"iso-8859-5":     "iso8859_5",
	"iso-8859-6":     "iso8859_6",
	"iso-8859-7":     "iso8859_7",
	"iso-8859-8":     "iso8859_8",
	"iso-8859-8-i":   "iso8859_8I",
	"iso-8859-10":    "iso8859_10",
	"iso-8859-13":    "iso8859_13",
	"iso-8859-14":    "iso8859_14",
	"iso-8859-15":    "iso8859_15",
	"iso-8859-16":    "iso8859_16",
	"koi8-r":         "koi8r",
	"koi8-u":         "koi8u",
	"macintosh":      "macintosh",
	"windows-874":    "windows874",
	"windows-1250":   "windows1250",
	"windows-1251":   "windows1251",
	"windows-1252":   "windows1252",
	"windows-1253":   "windows1253",
	"windows-1254":   "windows1254",
	"windows-1255":   "windows1255",
	"windows-1256":   "windows1256",
	"windows-1257":   "windows1257",
	"windows-1258":   "windows1258",
	"x-mac-cyrillic": "macintoshCyrillic",
	"gbk":            "gbk",
	"gb18030":        "gb18030",
	// "hz-gb-2312":     "hzgb2312", // Was removed from WhatWG
	"big5":           "big5",
	"euc-jp":         "eucjp",
	"iso-2022-jp":    "iso2022jp",
	"shift_jis":      "shiftJIS",
	"euc-kr":         "euckr",
	"replacement":    "replacement",
	"utf-16be":       "utf16be",
	"utf-16le":       "utf16le",
	"x-user-defined": "xUserDefined",
}

// locales is taken from
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
var locales = []struct{ tag, name string }{
	// The default value. Explicitly state latin to benefit from the exact
	// script option, while still making 1252 the default encoding for languages
	// written in Latin script.
	{"und_Latn", "windows-1252"},
	{"ar", "windows-1256"},
	{"ba", "windows-1251"},
	{"be", "windows-1251"},
	{"bg", "windows-1251"},
	{"cs", "windows-1250"},
	{"el", "iso-8859-7"},
	{"et", "windows-1257"},
	{"fa", "windows-1256"},
	{"he", "windows-1255"},
	{"hr", "windows-1250"},
	{"hu", "iso-8859-2"},
	{"ja", "shift_jis"},
	{"kk", "windows-1251"},
	{"ko", "euc-kr"},
	{"ku", "windows-1254"},
	{"ky", "windows-1251"},
	{"lt", "windows-1257"},
	{"lv", "windows-1257"},
	{"mk", "windows-1251"},
	{"pl", "iso-8859-2"},
	{"ru", "windows-1251"},
	{"sah", "windows-1251"},
	{"sk", "windows-1250"},
	{"sl", "iso-8859-2"},
	{"sr", "windows-1251"},
	{"tg", "windows-1251"},
	{"th", "windows-874"},
	{"tr", "windows-1254"},
	{"tt", "windows-1251"},
	{"uk", "windows-1251"},
	{"vi", "windows-1258"},
	{"zh-hans", "gb18030"},
	{"zh-hant", "big5"},
}