Newer
Older
minecraft-ui / vendor / golang.org / x / text / encoding / ianaindex / ianaindex.go
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:generate go run gen.go

// Package ianaindex maps names to Encodings as specified by the IANA registry.
// This includes both the MIME and IANA names.
//
// See http://www.iana.org/assignments/character-sets/character-sets.xhtml for
// more details.
package ianaindex

import (
	"errors"
	"sort"
	"strings"

	"golang.org/x/text/encoding"
	"golang.org/x/text/encoding/charmap"
	"golang.org/x/text/encoding/internal/identifier"
	"golang.org/x/text/encoding/japanese"
	"golang.org/x/text/encoding/korean"
	"golang.org/x/text/encoding/simplifiedchinese"
	"golang.org/x/text/encoding/traditionalchinese"
	"golang.org/x/text/encoding/unicode"
)

// TODO: remove the "Status... incomplete" in the package doc comment.
// TODO: allow users to specify their own aliases?
// TODO: allow users to specify their own indexes?
// TODO: allow canonicalizing names

// NOTE: only use these top-level variables if we can get the linker to drop
// the indexes when they are not used. Make them a function or perhaps only
// support MIME otherwise.

var (
	// MIME is an index to map MIME names.
	MIME *Index = mime

	// IANA is an index that supports all names and aliases using IANA names as
	// the canonical identifier.
	IANA *Index = iana

	// MIB is an index that associates the MIB display name with an Encoding.
	MIB *Index = mib

	mime = &Index{mimeName, ianaToMIB, ianaAliases, encodings[:]}
	iana = &Index{ianaName, ianaToMIB, ianaAliases, encodings[:]}
	mib  = &Index{mibName, ianaToMIB, ianaAliases, encodings[:]}
)

// Index maps names registered by IANA to Encodings.
// Currently different Indexes only differ in the names they return for
// encodings. In the future they may also differ in supported aliases.
type Index struct {
	names func(i int) string
	toMIB []identifier.MIB // Sorted slice of supported MIBs
	alias map[string]int
	enc   []encoding.Encoding
}

var (
	errInvalidName = errors.New("ianaindex: invalid encoding name")
	errUnknown     = errors.New("ianaindex: unknown Encoding")
	errUnsupported = errors.New("ianaindex: unsupported Encoding")
)

// Encoding returns an Encoding for IANA-registered names. Matching is
// case-insensitive.
func (x *Index) Encoding(name string) (encoding.Encoding, error) {
	name = strings.TrimSpace(name)
	// First try without lowercasing (possibly creating an allocation).
	i, ok := x.alias[name]
	if !ok {
		i, ok = x.alias[strings.ToLower(name)]
		if !ok {
			return nil, errInvalidName
		}
	}
	return x.enc[i], nil
}

// Name reports the canonical name of the given Encoding. It will return an
// error if the e is not associated with a known encoding scheme.
func (x *Index) Name(e encoding.Encoding) (string, error) {
	id, ok := e.(identifier.Interface)
	if !ok {
		return "", errUnknown
	}
	mib, _ := id.ID()
	if mib == 0 {
		return "", errUnknown
	}
	v := findMIB(x.toMIB, mib)
	if v == -1 {
		return "", errUnsupported
	}
	return x.names(v), nil
}

// TODO: the coverage of this index is rather spotty. Allowing users to set
// encodings would allow:
// - users to increase coverage
// - allow a partially loaded set of encodings in case the user doesn't need to
//   them all.
// - write an OS-specific wrapper for supported encodings and set them.
// The exact definition of Set depends a bit on if and how we want to let users
// write their own Encoding implementations. Also, it is not possible yet to
// only partially load the encodings without doing some refactoring. Until this
// is solved, we might as well not support Set.
// // Set sets the e to be used for the encoding scheme identified by name. Only
// // canonical names may be used. An empty name assigns e to its internally
// // associated encoding scheme.
// func (x *Index) Set(name string, e encoding.Encoding) error {
// 	panic("TODO: implement")
// }

func findMIB(x []identifier.MIB, mib identifier.MIB) int {
	i := sort.Search(len(x), func(i int) bool { return x[i] >= mib })
	if i < len(x) && x[i] == mib {
		return i
	}
	return -1
}

const maxMIMENameLen = '0' - 1 // officially 40, but we leave some buffer.

func mimeName(x int) string {
	n := ianaNames[x]
	// See gen.go for a description of the encoding.
	if n[0] <= maxMIMENameLen {
		return n[1:n[0]]
	}
	return n
}

func ianaName(x int) string {
	n := ianaNames[x]
	// See gen.go for a description of the encoding.
	if n[0] <= maxMIMENameLen {
		return n[n[0]:]
	}
	return n
}

func mibName(x int) string {
	return mibNames[x]
}

var encodings = [numIANA]encoding.Encoding{
	enc106:  unicode.UTF8,
	enc1015: unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
	enc1013: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
	enc1014: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
	enc2028: charmap.CodePage037,
	enc2011: charmap.CodePage437,
	enc2009: charmap.CodePage850,
	enc2010: charmap.CodePage852,
	enc2046: charmap.CodePage855,
	enc2089: charmap.CodePage858,
	enc2048: charmap.CodePage860,
	enc2013: charmap.CodePage862,
	enc2050: charmap.CodePage863,
	enc2052: charmap.CodePage865,
	enc2086: charmap.CodePage866,
	enc2102: charmap.CodePage1047,
	enc2091: charmap.CodePage1140,
	enc4:    charmap.ISO8859_1,
	enc5:    charmap.ISO8859_2,
	enc6:    charmap.ISO8859_3,
	enc7:    charmap.ISO8859_4,
	enc8:    charmap.ISO8859_5,
	enc9:    charmap.ISO8859_6,
	enc81:   charmap.ISO8859_6E,
	enc82:   charmap.ISO8859_6I,
	enc10:   charmap.ISO8859_7,
	enc11:   charmap.ISO8859_8,
	enc84:   charmap.ISO8859_8E,
	enc85:   charmap.ISO8859_8I,
	enc12:   charmap.ISO8859_9,
	enc13:   charmap.ISO8859_10,
	enc109:  charmap.ISO8859_13,
	enc110:  charmap.ISO8859_14,
	enc111:  charmap.ISO8859_15,
	enc112:  charmap.ISO8859_16,
	enc2084: charmap.KOI8R,
	enc2088: charmap.KOI8U,
	enc2027: charmap.Macintosh,
	enc2109: charmap.Windows874,
	enc2250: charmap.Windows1250,
	enc2251: charmap.Windows1251,
	enc2252: charmap.Windows1252,
	enc2253: charmap.Windows1253,
	enc2254: charmap.Windows1254,
	enc2255: charmap.Windows1255,
	enc2256: charmap.Windows1256,
	enc2257: charmap.Windows1257,
	enc2258: charmap.Windows1258,
	enc18:   japanese.EUCJP,
	enc39:   japanese.ISO2022JP,
	enc17:   japanese.ShiftJIS,
	enc38:   korean.EUCKR,
	enc114:  simplifiedchinese.GB18030,
	enc113:  simplifiedchinese.GBK,
	enc2085: simplifiedchinese.HZGB2312,
	enc2026: traditionalchinese.Big5,
}