Newer
Older
minecraft-ui / vendor / golang.org / x / text / collate / build / builder_test.go
// Copyright 2012 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package build

import "testing"

// cjk returns an implicit collation element for a CJK rune.
func cjk(r rune) []rawCE {
	// A CJK character C is represented in the DUCET as
	//   [.AAAA.0020.0002.C][.BBBB.0000.0000.C]
	// Where AAAA is the most significant 15 bits plus a base value.
	// Any base value will work for the test, so we pick the common value of FB40.
	const base = 0xFB40
	return []rawCE{
		{w: []int{base + int(r>>15), defaultSecondary, defaultTertiary, int(r)}},
		{w: []int{int(r&0x7FFF) | 0x8000, 0, 0, int(r)}},
	}
}

func pCE(p int) []rawCE {
	return mkCE([]int{p, defaultSecondary, defaultTertiary, 0}, 0)
}

func pqCE(p, q int) []rawCE {
	return mkCE([]int{p, defaultSecondary, defaultTertiary, q}, 0)
}

func ptCE(p, t int) []rawCE {
	return mkCE([]int{p, defaultSecondary, t, 0}, 0)
}

func ptcCE(p, t int, ccc uint8) []rawCE {
	return mkCE([]int{p, defaultSecondary, t, 0}, ccc)
}

func sCE(s int) []rawCE {
	return mkCE([]int{0, s, defaultTertiary, 0}, 0)
}

func stCE(s, t int) []rawCE {
	return mkCE([]int{0, s, t, 0}, 0)
}

func scCE(s int, ccc uint8) []rawCE {
	return mkCE([]int{0, s, defaultTertiary, 0}, ccc)
}

func mkCE(w []int, ccc uint8) []rawCE {
	return []rawCE{rawCE{w, ccc}}
}

// ducetElem is used to define test data that is used to generate a table.
type ducetElem struct {
	str string
	ces []rawCE
}

func newBuilder(t *testing.T, ducet []ducetElem) *Builder {
	b := NewBuilder()
	for _, e := range ducet {
		ces := [][]int{}
		for _, ce := range e.ces {
			ces = append(ces, ce.w)
		}
		if err := b.Add([]rune(e.str), ces, nil); err != nil {
			t.Errorf(err.Error())
		}
	}
	b.t = &table{}
	b.root.sort()
	return b
}

type convertTest struct {
	in, out []rawCE
	err     bool
}

var convLargeTests = []convertTest{
	{pCE(0xFB39), pCE(0xFB39), false},
	{cjk(0x2F9B2), pqCE(0x3F9B2, 0x2F9B2), false},
	{pCE(0xFB40), pCE(0), true},
	{append(pCE(0xFB40), pCE(0)[0]), pCE(0), true},
	{pCE(0xFFFE), pCE(illegalOffset), false},
	{pCE(0xFFFF), pCE(illegalOffset + 1), false},
}

func TestConvertLarge(t *testing.T) {
	for i, tt := range convLargeTests {
		e := new(entry)
		for _, ce := range tt.in {
			e.elems = append(e.elems, makeRawCE(ce.w, ce.ccc))
		}
		elems, err := convertLargeWeights(e.elems)
		if tt.err {
			if err == nil {
				t.Errorf("%d: expected error; none found", i)
			}
			continue
		} else if err != nil {
			t.Errorf("%d: unexpected error: %v", i, err)
		}
		if !equalCEArrays(elems, tt.out) {
			t.Errorf("%d: conversion was %x; want %x", i, elems, tt.out)
		}
	}
}

// Collation element table for simplify tests.
var simplifyTest = []ducetElem{
	{"\u0300", sCE(30)}, // grave
	{"\u030C", sCE(40)}, // caron
	{"A", ptCE(100, 8)},
	{"D", ptCE(104, 8)},
	{"E", ptCE(105, 8)},
	{"I", ptCE(110, 8)},
	{"z", ptCE(130, 8)},
	{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0])},
	{"\u05B7", sCE(80)},
	{"\u00C0", append(ptCE(100, 8), sCE(30)...)},                                // A with grave, can be removed
	{"\u00C8", append(ptCE(105, 8), sCE(30)...)},                                // E with grave
	{"\uFB1F", append(ptCE(200, 4), ptCE(200, 4)[0], sCE(80)[0])},               // eliminated by NFD
	{"\u00C8\u0302", ptCE(106, 8)},                                              // block previous from simplifying
	{"\u01C5", append(ptCE(104, 9), ptCE(130, 4)[0], stCE(40, maxTertiary)[0])}, // eliminated by NFKD
	// no removal: tertiary value of third element is not maxTertiary
	{"\u2162", append(ptCE(110, 9), ptCE(110, 4)[0], ptCE(110, 8)[0])},
}

var genColTests = []ducetElem{
	{"\uFA70", pqCE(0x1FA70, 0xFA70)},
	{"A\u0300", append(ptCE(100, 8), sCE(30)...)},
	{"A\u0300\uFA70", append(ptCE(100, 8), sCE(30)[0], pqCE(0x1FA70, 0xFA70)[0])},
	{"A\u0300A\u0300", append(ptCE(100, 8), sCE(30)[0], ptCE(100, 8)[0], sCE(30)[0])},
}

func TestGenColElems(t *testing.T) {
	b := newBuilder(t, simplifyTest[:5])

	for i, tt := range genColTests {
		res := b.root.genColElems(tt.str)
		if !equalCEArrays(tt.ces, res) {
			t.Errorf("%d: result %X; want %X", i, res, tt.ces)
		}
	}
}

type strArray []string

func (sa strArray) contains(s string) bool {
	for _, e := range sa {
		if e == s {
			return true
		}
	}
	return false
}

var simplifyRemoved = strArray{"\u00C0", "\uFB1F"}
var simplifyMarked = strArray{"\u01C5"}

func TestSimplify(t *testing.T) {
	b := newBuilder(t, simplifyTest)
	o := &b.root
	simplify(o)

	for i, tt := range simplifyTest {
		if simplifyRemoved.contains(tt.str) {
			continue
		}
		e := o.find(tt.str)
		if e.str != tt.str || !equalCEArrays(e.elems, tt.ces) {
			t.Errorf("%d: found element %s -> %X; want %s -> %X", i, e.str, e.elems, tt.str, tt.ces)
			break
		}
	}
	var i, k int
	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
		gold := simplifyMarked.contains(e.str)
		if gold {
			k++
		}
		if gold != e.decompose {
			t.Errorf("%d: %s has decompose %v; want %v", i, e.str, e.decompose, gold)
		}
		i++
	}
	if k != len(simplifyMarked) {
		t.Errorf(" an entry that should be marked as decompose was deleted")
	}
}

var expandTest = []ducetElem{
	{"\u0300", append(scCE(29, 230), scCE(30, 230)...)},
	{"\u00C0", append(ptCE(100, 8), scCE(30, 230)...)},
	{"\u00C8", append(ptCE(105, 8), scCE(30, 230)...)},
	{"\u00C9", append(ptCE(105, 8), scCE(30, 230)...)}, // identical expansion
	{"\u05F2", append(ptCE(200, 4), ptCE(200, 4)[0], ptCE(200, 4)[0])},
	{"\u01FF", append(ptCE(200, 4), ptcCE(201, 4, 0)[0], scCE(30, 230)[0])},
}

func TestExpand(t *testing.T) {
	const (
		totalExpansions = 5
		totalElements   = 2 + 2 + 2 + 3 + 3 + totalExpansions
	)
	b := newBuilder(t, expandTest)
	o := &b.root
	b.processExpansions(o)

	e := o.front()
	for _, tt := range expandTest {
		exp := b.t.ExpandElem[e.expansionIndex:]
		if int(exp[0]) != len(tt.ces) {
			t.Errorf("%U: len(expansion)==%d; want %d", []rune(tt.str)[0], exp[0], len(tt.ces))
		}
		exp = exp[1:]
		for j, w := range tt.ces {
			if ce, _ := makeCE(w); exp[j] != ce {
				t.Errorf("%U: element %d is %X; want %X", []rune(tt.str)[0], j, exp[j], ce)
			}
		}
		e, _ = e.nextIndexed()
	}
	// Verify uniquing.
	if len(b.t.ExpandElem) != totalElements {
		t.Errorf("len(expandElem)==%d; want %d", len(b.t.ExpandElem), totalElements)
	}
}

var contractTest = []ducetElem{
	{"abc", pCE(102)},
	{"abd", pCE(103)},
	{"a", pCE(100)},
	{"ab", pCE(101)},
	{"ac", pCE(104)},
	{"bcd", pCE(202)},
	{"b", pCE(200)},
	{"bc", pCE(201)},
	{"bd", pCE(203)},
	// shares suffixes with a*
	{"Ab", pCE(301)},
	{"A", pCE(300)},
	{"Ac", pCE(304)},
	{"Abc", pCE(302)},
	{"Abd", pCE(303)},
	// starter to be ignored
	{"z", pCE(1000)},
}

func TestContract(t *testing.T) {
	const (
		totalElements = 5 + 5 + 4
	)
	b := newBuilder(t, contractTest)
	o := &b.root
	b.processContractions(o)

	indexMap := make(map[int]bool)
	handleMap := make(map[rune]*entry)
	for e := o.front(); e != nil; e, _ = e.nextIndexed() {
		if e.contractionHandle.n > 0 {
			handleMap[e.runes[0]] = e
			indexMap[e.contractionHandle.index] = true
		}
	}
	// Verify uniquing.
	if len(indexMap) != 2 {
		t.Errorf("number of tries is %d; want %d", len(indexMap), 2)
	}
	for _, tt := range contractTest {
		e, ok := handleMap[[]rune(tt.str)[0]]
		if !ok {
			continue
		}
		str := tt.str[1:]
		offset, n := lookup(&b.t.ContractTries, e.contractionHandle, []byte(str))
		if len(str) != n {
			t.Errorf("%s: bytes consumed==%d; want %d", tt.str, n, len(str))
		}
		ce := b.t.ContractElem[offset+e.contractionIndex]
		if want, _ := makeCE(tt.ces[0]); want != ce {
			t.Errorf("%s: element %X; want %X", tt.str, ce, want)
		}
	}
	if len(b.t.ContractElem) != totalElements {
		t.Errorf("len(expandElem)==%d; want %d", len(b.t.ContractElem), totalElements)
	}
}