Newer
Older
pokemon-go-trade / vendor / golang.org / x / text / collate / build / trie.go
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// The trie in this file is used to associate the first full character
// in a UTF-8 string to a collation element.
// All but the last byte in a UTF-8 byte sequence are
// used to look up offsets in the index table to be used for the next byte.
// The last byte is used to index into a table of collation elements.
// This file contains the code for the generation of the trie.

package build

import (
	"fmt"
	"hash/fnv"
	"io"
	"reflect"
)

const (
	blockSize   = 64
	blockOffset = 2 // Subtract 2 blocks to compensate for the 0x80 added to continuation bytes.
)

type trieHandle struct {
	lookupStart uint16 // offset in table for first byte
	valueStart  uint16 // offset in table for first byte
}

type trie struct {
	index  []uint16
	values []uint32
}

// trieNode is the intermediate trie structure used for generating a trie.
type trieNode struct {
	index    []*trieNode
	value    []uint32
	b        byte
	refValue uint16
	refIndex uint16
}

func newNode() *trieNode {
	return &trieNode{
		index: make([]*trieNode, 64),
		value: make([]uint32, 128), // root node size is 128 instead of 64
	}
}

func (n *trieNode) isInternal() bool {
	return n.value != nil
}

func (n *trieNode) insert(r rune, value uint32) {
	const maskx = 0x3F // mask out two most-significant bits
	str := string(r)
	if len(str) == 1 {
		n.value[str[0]] = value
		return
	}
	for i := 0; i < len(str)-1; i++ {
		b := str[i] & maskx
		if n.index == nil {
			n.index = make([]*trieNode, blockSize)
		}
		nn := n.index[b]
		if nn == nil {
			nn = &trieNode{}
			nn.b = b
			n.index[b] = nn
		}
		n = nn
	}
	if n.value == nil {
		n.value = make([]uint32, blockSize)
	}
	b := str[len(str)-1] & maskx
	n.value[b] = value
}

type trieBuilder struct {
	t *trie

	roots []*trieHandle

	lookupBlocks []*trieNode
	valueBlocks  []*trieNode

	lookupBlockIdx map[uint32]*trieNode
	valueBlockIdx  map[uint32]*trieNode
}

func newTrieBuilder() *trieBuilder {
	index := &trieBuilder{}
	index.lookupBlocks = make([]*trieNode, 0)
	index.valueBlocks = make([]*trieNode, 0)
	index.lookupBlockIdx = make(map[uint32]*trieNode)
	index.valueBlockIdx = make(map[uint32]*trieNode)
	// The third nil is the default null block.  The other two blocks
	// are used to guarantee an offset of at least 3 for each block.
	index.lookupBlocks = append(index.lookupBlocks, nil, nil, nil)
	index.t = &trie{}
	return index
}

func (b *trieBuilder) computeOffsets(n *trieNode) *trieNode {
	hasher := fnv.New32()
	if n.index != nil {
		for i, nn := range n.index {
			var vi, vv uint16
			if nn != nil {
				nn = b.computeOffsets(nn)
				n.index[i] = nn
				vi = nn.refIndex
				vv = nn.refValue
			}
			hasher.Write([]byte{byte(vi >> 8), byte(vi)})
			hasher.Write([]byte{byte(vv >> 8), byte(vv)})
		}
		h := hasher.Sum32()
		nn, ok := b.lookupBlockIdx[h]
		if !ok {
			n.refIndex = uint16(len(b.lookupBlocks)) - blockOffset
			b.lookupBlocks = append(b.lookupBlocks, n)
			b.lookupBlockIdx[h] = n
		} else {
			n = nn
		}
	} else {
		for _, v := range n.value {
			hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
		}
		h := hasher.Sum32()
		nn, ok := b.valueBlockIdx[h]
		if !ok {
			n.refValue = uint16(len(b.valueBlocks)) - blockOffset
			n.refIndex = n.refValue
			b.valueBlocks = append(b.valueBlocks, n)
			b.valueBlockIdx[h] = n
		} else {
			n = nn
		}
	}
	return n
}

func (b *trieBuilder) addStartValueBlock(n *trieNode) uint16 {
	hasher := fnv.New32()
	for _, v := range n.value[:2*blockSize] {
		hasher.Write([]byte{byte(v >> 24), byte(v >> 16), byte(v >> 8), byte(v)})
	}
	h := hasher.Sum32()
	nn, ok := b.valueBlockIdx[h]
	if !ok {
		n.refValue = uint16(len(b.valueBlocks))
		n.refIndex = n.refValue
		b.valueBlocks = append(b.valueBlocks, n)
		// Add a dummy block to accommodate the double block size.
		b.valueBlocks = append(b.valueBlocks, nil)
		b.valueBlockIdx[h] = n
	} else {
		n = nn
	}
	return n.refValue
}

func genValueBlock(t *trie, n *trieNode) {
	if n != nil {
		for _, v := range n.value {
			t.values = append(t.values, v)
		}
	}
}

func genLookupBlock(t *trie, n *trieNode) {
	for _, nn := range n.index {
		v := uint16(0)
		if nn != nil {
			if n.index != nil {
				v = nn.refIndex
			} else {
				v = nn.refValue
			}
		}
		t.index = append(t.index, v)
	}
}

func (b *trieBuilder) addTrie(n *trieNode) *trieHandle {
	h := &trieHandle{}
	b.roots = append(b.roots, h)
	h.valueStart = b.addStartValueBlock(n)
	if len(b.roots) == 1 {
		// We insert a null block after the first start value block.
		// This ensures that continuation bytes UTF-8 sequences of length
		// greater than 2 will automatically hit a null block if there
		// was an undefined entry.
		b.valueBlocks = append(b.valueBlocks, nil)
	}
	n = b.computeOffsets(n)
	// Offset by one extra block as the first byte starts at 0xC0 instead of 0x80.
	h.lookupStart = n.refIndex - 1
	return h
}

// generate generates and returns the trie for n.
func (b *trieBuilder) generate() (t *trie, err error) {
	t = b.t
	if len(b.valueBlocks) >= 1<<16 {
		return nil, fmt.Errorf("maximum number of value blocks exceeded (%d > %d)", len(b.valueBlocks), 1<<16)
	}
	if len(b.lookupBlocks) >= 1<<16 {
		return nil, fmt.Errorf("maximum number of lookup blocks exceeded (%d > %d)", len(b.lookupBlocks), 1<<16)
	}
	genValueBlock(t, b.valueBlocks[0])
	genValueBlock(t, &trieNode{value: make([]uint32, 64)})
	for i := 2; i < len(b.valueBlocks); i++ {
		genValueBlock(t, b.valueBlocks[i])
	}
	n := &trieNode{index: make([]*trieNode, 64)}
	genLookupBlock(t, n)
	genLookupBlock(t, n)
	genLookupBlock(t, n)
	for i := 3; i < len(b.lookupBlocks); i++ {
		genLookupBlock(t, b.lookupBlocks[i])
	}
	return b.t, nil
}

func (t *trie) printArrays(w io.Writer, name string) (n, size int, err error) {
	p := func(f string, a ...interface{}) {
		nn, e := fmt.Fprintf(w, f, a...)
		n += nn
		if err == nil {
			err = e
		}
	}
	nv := len(t.values)
	p("// %sValues: %d entries, %d bytes\n", name, nv, nv*4)
	p("// Block 2 is the null block.\n")
	p("var %sValues = [%d]uint32 {", name, nv)
	var printnewline bool
	for i, v := range t.values {
		if i%blockSize == 0 {
			p("\n\t// Block %#x, offset %#x", i/blockSize, i)
		}
		if i%4 == 0 {
			printnewline = true
		}
		if v != 0 {
			if printnewline {
				p("\n\t")
				printnewline = false
			}
			p("%#04x:%#08x, ", i, v)
		}
	}
	p("\n}\n\n")
	ni := len(t.index)
	p("// %sLookup: %d entries, %d bytes\n", name, ni, ni*2)
	p("// Block 0 is the null block.\n")
	p("var %sLookup = [%d]uint16 {", name, ni)
	printnewline = false
	for i, v := range t.index {
		if i%blockSize == 0 {
			p("\n\t// Block %#x, offset %#x", i/blockSize, i)
		}
		if i%8 == 0 {
			printnewline = true
		}
		if v != 0 {
			if printnewline {
				p("\n\t")
				printnewline = false
			}
			p("%#03x:%#02x, ", i, v)
		}
	}
	p("\n}\n\n")
	return n, nv*4 + ni*2, err
}

func (t *trie) printStruct(w io.Writer, handle *trieHandle, name string) (n, sz int, err error) {
	const msg = "trie{ %sLookup[%d:], %sValues[%d:], %sLookup[:], %sValues[:]}"
	n, err = fmt.Fprintf(w, msg, name, handle.lookupStart*blockSize, name, handle.valueStart*blockSize, name, name)
	sz += int(reflect.TypeOf(trie{}).Size())
	return
}