Newer
Older
pokemon-go-trade / vendor / golang.org / x / text / message / pipeline / pipeline.go
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package pipeline provides tools for creating translation pipelines.
//
// NOTE: UNDER DEVELOPMENT. API MAY CHANGE.
package pipeline

import (
	"bytes"
	"encoding/json"
	"fmt"
	"go/build"
	"go/parser"
	"io/ioutil"
	"log"
	"os"
	"path/filepath"
	"regexp"
	"strings"
	"text/template"
	"unicode"

	"golang.org/x/text/internal"
	"golang.org/x/text/language"
	"golang.org/x/text/runes"
	"golang.org/x/tools/go/loader"
)

const (
	extractFile  = "extracted.gotext.json"
	outFile      = "out.gotext.json"
	gotextSuffix = "gotext.json"
)

// Config contains configuration for the translation pipeline.
type Config struct {
	// Supported indicates the languages for which data should be generated.
	// The default is to support all locales for which there are matching
	// translation files.
	Supported []language.Tag

	// --- Extraction

	SourceLanguage language.Tag

	Packages []string

	// --- File structure

	// Dir is the root dir for all operations.
	Dir string

	// TranslationsPattern is a regular expression to match incoming translation
	// files. These files may appear in any directory rooted at Dir.
	// language for the translation files is determined as follows:
	//   1. From the Language field in the file.
	//   2. If not present, from a valid language tag in the filename, separated
	//      by dots (e.g. "en-US.json" or "incoming.pt_PT.xmb").
	//   3. If not present, from a the closest subdirectory in which the file
	//      is contained that parses as a valid language tag.
	TranslationsPattern string

	// OutPattern defines the location for translation files for a certain
	// language. The default is "{{.Dir}}/{{.Language}}/out.{{.Ext}}"
	OutPattern string

	// Format defines the file format for generated translation files.
	// The default is XMB. Alternatives are GetText, XLIFF, L20n, GoText.
	Format string

	Ext string

	// TODO:
	// Actions are additional actions to be performed after the initial extract
	// and merge.
	// Actions []struct {
	// 	Name    string
	// 	Options map[string]string
	// }

	// --- Generation

	// GenFile may be in a different package. It is not defined, it will
	// be written to stdout.
	GenFile string

	// GenPackage is the package or relative path into which to generate the
	// file. If not specified it is relative to the current directory.
	GenPackage string

	// DeclareVar defines a variable to which to assing the generated Catalog.
	DeclareVar string

	// SetDefault determines whether to assign the generated Catalog to
	// message.DefaultCatalog. The default for this is true if DeclareVar is
	// not defined, false otherwise.
	SetDefault bool

	// TODO:
	// - Printf-style configuration
	// - Template-style configuration
	// - Extraction options
	// - Rewrite options
	// - Generation options
}

// Operations:
// - extract:       get the strings
// - disambiguate:  find messages with the same key, but possible different meaning.
// - create out:    create a list of messages that need translations
// - load trans:    load the list of current translations
// - merge:         assign list of translations as done
// - (action)expand:    analyze features and create example sentences for each version.
// - (action)googletrans:   pre-populate messages with automatic translations.
// - (action)export:    send out messages somewhere non-standard
// - (action)import:    load messages from somewhere non-standard
// - vet program:   don't pass "foo" + var + "bar" strings. Not using funcs for translated strings.
// - vet trans:     coverage: all translations/ all features.
// - generate:      generate Go code

// State holds all accumulated information on translations during processing.
type State struct {
	Config Config

	Package string
	program *loader.Program

	Extracted Messages `json:"messages"`

	// Messages includes all messages for which there need to be translations.
	// Duplicates may be eliminated. Generation will be done from these messages
	// (usually after merging).
	Messages []Messages

	// Translations are incoming translations for the application messages.
	Translations []Messages
}

func (s *State) dir() string {
	if d := s.Config.Dir; d != "" {
		return d
	}
	return "./locales"
}

func outPattern(s *State) (string, error) {
	c := s.Config
	pat := c.OutPattern
	if pat == "" {
		pat = "{{.Dir}}/{{.Language}}/out.{{.Ext}}"
	}

	ext := c.Ext
	if ext == "" {
		ext = c.Format
	}
	if ext == "" {
		ext = gotextSuffix
	}
	t, err := template.New("").Parse(pat)
	if err != nil {
		return "", wrap(err, "error parsing template")
	}
	buf := bytes.Buffer{}
	err = t.Execute(&buf, map[string]string{
		"Dir":      s.dir(),
		"Language": "%s",
		"Ext":      ext,
	})
	return filepath.FromSlash(buf.String()), wrap(err, "incorrect OutPattern")
}

var transRE = regexp.MustCompile(`.*\.` + gotextSuffix)

// Import loads existing translation files.
func (s *State) Import() error {
	outPattern, err := outPattern(s)
	if err != nil {
		return err
	}
	re := transRE
	if pat := s.Config.TranslationsPattern; pat != "" {
		if re, err = regexp.Compile(pat); err != nil {
			return wrapf(err, "error parsing regexp %q", s.Config.TranslationsPattern)
		}
	}
	x := importer{s, outPattern, re}
	return x.walkImport(s.dir(), s.Config.SourceLanguage)
}

type importer struct {
	state      *State
	outPattern string
	transFile  *regexp.Regexp
}

func (i *importer) walkImport(path string, tag language.Tag) error {
	files, err := ioutil.ReadDir(path)
	if err != nil {
		return nil
	}
	for _, f := range files {
		name := f.Name()
		tag := tag
		if f.IsDir() {
			if t, err := language.Parse(name); err == nil {
				tag = t
			}
			// We ignore errors
			if err := i.walkImport(filepath.Join(path, name), tag); err != nil {
				return err
			}
			continue
		}
		for _, l := range strings.Split(name, ".") {
			if t, err := language.Parse(l); err == nil {
				tag = t
			}
		}
		file := filepath.Join(path, name)
		// TODO: Should we skip files that match output files?
		if fmt.Sprintf(i.outPattern, tag) == file {
			continue
		}
		// TODO: handle different file formats.
		if !i.transFile.MatchString(name) {
			continue
		}
		b, err := ioutil.ReadFile(file)
		if err != nil {
			return wrap(err, "read file failed")
		}
		var translations Messages
		if err := json.Unmarshal(b, &translations); err != nil {
			return wrap(err, "parsing translation file failed")
		}
		i.state.Translations = append(i.state.Translations, translations)
	}
	return nil
}

// Merge merges the extracted messages with the existing translations.
func (s *State) Merge() error {
	if s.Messages != nil {
		panic("already merged")
	}
	// Create an index for each unique message.
	// Duplicates are okay as long as the substitution arguments are okay as
	// well.
	// Top-level messages are okay to appear in multiple substitution points.

	// Collect key equivalence.
	msgs := []*Message{}
	keyToIDs := map[string]*Message{}
	for _, m := range s.Extracted.Messages {
		m := m
		if prev, ok := keyToIDs[m.Key]; ok {
			if err := checkEquivalence(&m, prev); err != nil {
				warnf("Key %q matches conflicting messages: %v and %v", m.Key, prev.ID, m.ID)
				// TODO: track enough information so that the rewriter can
				// suggest/disambiguate messages.
			}
			// TODO: add position to message.
			continue
		}
		i := len(msgs)
		msgs = append(msgs, &m)
		keyToIDs[m.Key] = msgs[i]
	}

	// Messages with different keys may still refer to the same translated
	// message (e.g. different whitespace). Filter these.
	idMap := map[string]bool{}
	filtered := []*Message{}
	for _, m := range msgs {
		found := false
		for _, id := range m.ID {
			found = found || idMap[id]
		}
		if !found {
			filtered = append(filtered, m)
		}
		for _, id := range m.ID {
			idMap[id] = true
		}
	}

	// Build index of translations.
	translations := map[language.Tag]map[string]Message{}
	languages := append([]language.Tag{}, s.Config.Supported...)

	for _, t := range s.Translations {
		tag := t.Language
		if _, ok := translations[tag]; !ok {
			translations[tag] = map[string]Message{}
			languages = append(languages, tag)
		}
		for _, m := range t.Messages {
			if !m.Translation.IsEmpty() {
				for _, id := range m.ID {
					if _, ok := translations[tag][id]; ok {
						warnf("Duplicate translation in locale %q for message %q", tag, id)
					}
					translations[tag][id] = m
				}
			}
		}
	}
	languages = internal.UniqueTags(languages)

	for _, tag := range languages {
		ms := Messages{Language: tag}
		for _, orig := range filtered {
			m := *orig
			m.Key = ""
			m.Position = ""

			for _, id := range m.ID {
				if t, ok := translations[tag][id]; ok {
					m.Translation = t.Translation
					if t.TranslatorComment != "" {
						m.TranslatorComment = t.TranslatorComment
						m.Fuzzy = t.Fuzzy
					}
					break
				}
			}
			if tag == s.Config.SourceLanguage && m.Translation.IsEmpty() {
				m.Translation = m.Message
				if m.TranslatorComment == "" {
					m.TranslatorComment = "Copied from source."
					m.Fuzzy = true
				}
			}
			// TODO: if translation is empty: pre-expand based on available
			// linguistic features. This may also be done as a plugin.
			ms.Messages = append(ms.Messages, m)
		}
		s.Messages = append(s.Messages, ms)
	}
	return nil
}

// Export writes out the messages to translation out files.
func (s *State) Export() error {
	path, err := outPattern(s)
	if err != nil {
		return wrap(err, "export failed")
	}
	for _, out := range s.Messages {
		// TODO: inject translations from existing files to avoid retranslation.
		data, err := json.MarshalIndent(out, "", "    ")
		if err != nil {
			return wrap(err, "JSON marshal failed")
		}
		file := fmt.Sprintf(path, out.Language)
		if err := os.MkdirAll(filepath.Dir(file), 0755); err != nil {
			return wrap(err, "dir create failed")
		}
		if err := ioutil.WriteFile(file, data, 0644); err != nil {
			return wrap(err, "write failed")
		}
	}
	return nil
}

var (
	ws    = runes.In(unicode.White_Space).Contains
	notWS = runes.NotIn(unicode.White_Space).Contains
)

func trimWS(s string) (trimmed, leadWS, trailWS string) {
	trimmed = strings.TrimRightFunc(s, ws)
	trailWS = s[len(trimmed):]
	if i := strings.IndexFunc(trimmed, notWS); i > 0 {
		leadWS = trimmed[:i]
		trimmed = trimmed[i:]
	}
	return trimmed, leadWS, trailWS
}

// NOTE: The command line tool already prefixes with "gotext:".
var (
	wrap = func(err error, msg string) error {
		if err == nil {
			return nil
		}
		return fmt.Errorf("%s: %v", msg, err)
	}
	wrapf = func(err error, msg string, args ...interface{}) error {
		if err == nil {
			return nil
		}
		return wrap(err, fmt.Sprintf(msg, args...))
	}
	errorf = fmt.Errorf
)

func warnf(format string, args ...interface{}) {
	// TODO: don't log.
	log.Printf(format, args...)
}

func loadPackages(conf *loader.Config, args []string) (*loader.Program, error) {
	if len(args) == 0 {
		args = []string{"."}
	}

	conf.Build = &build.Default
	conf.ParserMode = parser.ParseComments

	// Use the initial packages from the command line.
	args, err := conf.FromArgs(args, false)
	if err != nil {
		return nil, wrap(err, "loading packages failed")
	}

	// Load, parse and type-check the whole program.
	return conf.Load()
}