Newer
Older
pokemon-go-trade / vendor / golang.org / x / text / message / pipeline / extract.go
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package pipeline

import (
	"bytes"
	"errors"
	"fmt"
	"go/ast"
	"go/constant"
	"go/format"
	"go/token"
	"go/types"
	"path/filepath"
	"strings"
	"unicode"
	"unicode/utf8"

	fmtparser "golang.org/x/text/internal/format"
	"golang.org/x/tools/go/callgraph"
	"golang.org/x/tools/go/callgraph/cha"
	"golang.org/x/tools/go/loader"
	"golang.org/x/tools/go/ssa"
	"golang.org/x/tools/go/ssa/ssautil"
)

const debug = false

// TODO:
// - merge information into existing files
// - handle different file formats (PO, XLIFF)
// - handle features (gender, plural)
// - message rewriting

// - `msg:"etc"` tags

// Extract extracts all strings form the package defined in Config.
func Extract(c *Config) (*State, error) {
	x, err := newExtracter(c)
	if err != nil {
		return nil, wrap(err, "")
	}

	if err := x.seedEndpoints(); err != nil {
		return nil, err
	}
	x.extractMessages()

	return &State{
		Config:  *c,
		program: x.iprog,
		Extracted: Messages{
			Language: c.SourceLanguage,
			Messages: x.messages,
		},
	}, nil
}

type extracter struct {
	conf      loader.Config
	iprog     *loader.Program
	prog      *ssa.Program
	callGraph *callgraph.Graph

	// Calls and other expressions to collect.
	globals  map[token.Pos]*constData
	funcs    map[token.Pos]*callData
	messages []Message
}

func newExtracter(c *Config) (x *extracter, err error) {
	x = &extracter{
		conf:    loader.Config{},
		globals: map[token.Pos]*constData{},
		funcs:   map[token.Pos]*callData{},
	}

	x.iprog, err = loadPackages(&x.conf, c.Packages)
	if err != nil {
		return nil, wrap(err, "")
	}

	x.prog = ssautil.CreateProgram(x.iprog, ssa.GlobalDebug|ssa.BareInits)
	x.prog.Build()

	x.callGraph = cha.CallGraph(x.prog)

	return x, nil
}

func (x *extracter) globalData(pos token.Pos) *constData {
	cd := x.globals[pos]
	if cd == nil {
		cd = &constData{}
		x.globals[pos] = cd
	}
	return cd
}

func (x *extracter) seedEndpoints() error {
	pkgInfo := x.iprog.Package("golang.org/x/text/message")
	if pkgInfo == nil {
		return errors.New("pipeline: golang.org/x/text/message is not imported")
	}
	pkg := x.prog.Package(pkgInfo.Pkg)
	typ := types.NewPointer(pkg.Type("Printer").Type())

	x.processGlobalVars()

	x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Printf"), &callData{
		formatPos: 1,
		argPos:    2,
		isMethod:  true,
	})
	x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Sprintf"), &callData{
		formatPos: 1,
		argPos:    2,
		isMethod:  true,
	})
	x.handleFunc(x.prog.LookupMethod(typ, pkg.Pkg, "Fprintf"), &callData{
		formatPos: 2,
		argPos:    3,
		isMethod:  true,
	})
	return nil
}

// processGlobalVars finds string constants that are assigned to global
// variables.
func (x *extracter) processGlobalVars() {
	for _, p := range x.prog.AllPackages() {
		m, ok := p.Members["init"]
		if !ok {
			continue
		}
		for _, b := range m.(*ssa.Function).Blocks {
			for _, i := range b.Instrs {
				s, ok := i.(*ssa.Store)
				if !ok {
					continue
				}
				a, ok := s.Addr.(*ssa.Global)
				if !ok {
					continue
				}
				t := a.Type()
				for {
					p, ok := t.(*types.Pointer)
					if !ok {
						break
					}
					t = p.Elem()
				}
				if b, ok := t.(*types.Basic); !ok || b.Kind() != types.String {
					continue
				}
				x.visitInit(a, s.Val)
			}
		}
	}
}

type constData struct {
	call   *callData // to provide a signature for the constants
	values []constVal
	others []token.Pos // Assigned to other global data.
}

func (d *constData) visit(x *extracter, f func(c constant.Value)) {
	for _, v := range d.values {
		f(v.value)
	}
	for _, p := range d.others {
		if od, ok := x.globals[p]; ok {
			od.visit(x, f)
		}
	}
}

type constVal struct {
	value constant.Value
	pos   token.Pos
}

type callData struct {
	call    ssa.CallInstruction
	expr    *ast.CallExpr
	formats []constant.Value

	callee    *callData
	isMethod  bool
	formatPos int
	argPos    int   // varargs at this position in the call
	argTypes  []int // arguments extractable from this position
}

func (c *callData) callFormatPos() int {
	c = c.callee
	if c.isMethod {
		return c.formatPos - 1
	}
	return c.formatPos
}

func (c *callData) callArgsStart() int {
	c = c.callee
	if c.isMethod {
		return c.argPos - 1
	}
	return c.argPos
}

func (c *callData) Pos() token.Pos      { return c.call.Pos() }
func (c *callData) Pkg() *types.Package { return c.call.Parent().Pkg.Pkg }

func (x *extracter) handleFunc(f *ssa.Function, fd *callData) {
	for _, e := range x.callGraph.Nodes[f].In {
		if e.Pos() == 0 {
			continue
		}

		call := e.Site
		caller := x.funcs[call.Pos()]
		if caller != nil {
			// TODO: theoretically a format string could be passed to multiple
			// arguments of a function. Support this eventually.
			continue
		}
		x.debug(call, "CALL", f.String())

		caller = &callData{
			call:      call,
			callee:    fd,
			formatPos: -1,
			argPos:    -1,
		}
		// Offset by one if we are invoking an interface method.
		offset := 0
		if call.Common().IsInvoke() {
			offset = -1
		}
		x.funcs[call.Pos()] = caller
		if fd.argPos >= 0 {
			x.visitArgs(caller, call.Common().Args[fd.argPos+offset])
		}
		x.visitFormats(caller, call.Common().Args[fd.formatPos+offset])
	}
}

type posser interface {
	Pos() token.Pos
	Parent() *ssa.Function
}

func (x *extracter) debug(v posser, header string, args ...interface{}) {
	if debug {
		pos := ""
		if p := v.Parent(); p != nil {
			pos = posString(&x.conf, p.Package().Pkg, v.Pos())
		}
		if header != "CALL" && header != "INSERT" {
			header = "  " + header
		}
		fmt.Printf("%-32s%-10s%-15T ", pos+fmt.Sprintf("@%d", v.Pos()), header, v)
		for _, a := range args {
			fmt.Printf(" %v", a)
		}
		fmt.Println()
	}
}

// visitInit evaluates and collects values assigned to global variables in an
// init function.
func (x *extracter) visitInit(global *ssa.Global, v ssa.Value) {
	if v == nil {
		return
	}
	x.debug(v, "GLOBAL", v)

	switch v := v.(type) {
	case *ssa.Phi:
		for _, e := range v.Edges {
			x.visitInit(global, e)
		}

	case *ssa.Const:
		// Only record strings with letters.
		if str := constant.StringVal(v.Value); isMsg(str) {
			cd := x.globalData(global.Pos())
			cd.values = append(cd.values, constVal{v.Value, v.Pos()})
		}
		// TODO: handle %m-directive.

	case *ssa.Global:
		cd := x.globalData(global.Pos())
		cd.others = append(cd.others, v.Pos())

	case *ssa.FieldAddr, *ssa.Field:
		// TODO: mark field index v.Field of v.X.Type() for extraction. extract
		// an example args as to give parameters for the translator.

	case *ssa.Slice:
		if v.Low == nil && v.High == nil && v.Max == nil {
			x.visitInit(global, v.X)
		}

	case *ssa.Alloc:
		if ref := v.Referrers(); ref == nil {
			for _, r := range *ref {
				values := []ssa.Value{}
				for _, o := range r.Operands(nil) {
					if o == nil || *o == v {
						continue
					}
					values = append(values, *o)
				}
				// TODO: return something different if we care about multiple
				// values as well.
				if len(values) == 1 {
					x.visitInit(global, values[0])
				}
			}
		}

	case ssa.Instruction:
		rands := v.Operands(nil)
		if len(rands) == 1 && rands[0] != nil {
			x.visitInit(global, *rands[0])
		}
	}
	return
}

// visitFormats finds the original source of the value. The returned index is
// position of the argument if originated from a function argument or -1
// otherwise.
func (x *extracter) visitFormats(call *callData, v ssa.Value) {
	if v == nil {
		return
	}
	x.debug(v, "VALUE", v)

	switch v := v.(type) {
	case *ssa.Phi:
		for _, e := range v.Edges {
			x.visitFormats(call, e)
		}

	case *ssa.Const:
		// Only record strings with letters.
		if isMsg(constant.StringVal(v.Value)) {
			x.debug(call.call, "FORMAT", v.Value.ExactString())
			call.formats = append(call.formats, v.Value)
		}
		// TODO: handle %m-directive.

	case *ssa.Global:
		x.globalData(v.Pos()).call = call

	case *ssa.FieldAddr, *ssa.Field:
		// TODO: mark field index v.Field of v.X.Type() for extraction. extract
		// an example args as to give parameters for the translator.

	case *ssa.Slice:
		if v.Low == nil && v.High == nil && v.Max == nil {
			x.visitFormats(call, v.X)
		}

	case *ssa.Parameter:
		// TODO: handle the function for the index parameter.
		f := v.Parent()
		for i, p := range f.Params {
			if p == v {
				if call.formatPos < 0 {
					call.formatPos = i
					// TODO: is there a better way to detect this is calling
					// a method rather than a function?
					call.isMethod = len(f.Params) > f.Signature.Params().Len()
					x.handleFunc(v.Parent(), call)
				} else if debug && i != call.formatPos {
					// TODO: support this.
					fmt.Printf("WARNING:%s: format string passed to arg %d and %d\n",
						posString(&x.conf, call.Pkg(), call.Pos()),
						call.formatPos, i)
				}
			}
		}

	case *ssa.Alloc:
		if ref := v.Referrers(); ref == nil {
			for _, r := range *ref {
				values := []ssa.Value{}
				for _, o := range r.Operands(nil) {
					if o == nil || *o == v {
						continue
					}
					values = append(values, *o)
				}
				// TODO: return something different if we care about multiple
				// values as well.
				if len(values) == 1 {
					x.visitFormats(call, values[0])
				}
			}
		}

		// TODO:
	// case *ssa.Index:
	// 	// Get all values in the array if applicable
	// case *ssa.IndexAddr:
	// 	// Get all values in the slice or *array if applicable.
	// case *ssa.Lookup:
	// 	// Get all values in the map if applicable.

	case *ssa.FreeVar:
		// TODO: find the link between free variables and parameters:
		//
		// func freeVar(p *message.Printer, str string) {
		// 	fn := func(p *message.Printer) {
		// 		p.Printf(str)
		// 	}
		// 	fn(p)
		// }

	case *ssa.Call:

	case ssa.Instruction:
		rands := v.Operands(nil)
		if len(rands) == 1 && rands[0] != nil {
			x.visitFormats(call, *rands[0])
		}
	}
}

// Note: a function may have an argument marked as both format and passthrough.

// visitArgs collects information on arguments. For wrapped functions it will
// just determine the position of the variable args slice.
func (x *extracter) visitArgs(fd *callData, v ssa.Value) {
	if v == nil {
		return
	}
	x.debug(v, "ARGV", v)
	switch v := v.(type) {

	case *ssa.Slice:
		if v.Low == nil && v.High == nil && v.Max == nil {
			x.visitArgs(fd, v.X)
		}

	case *ssa.Parameter:
		// TODO: handle the function for the index parameter.
		f := v.Parent()
		for i, p := range f.Params {
			if p == v {
				fd.argPos = i
			}
		}

	case *ssa.Alloc:
		if ref := v.Referrers(); ref == nil {
			for _, r := range *ref {
				values := []ssa.Value{}
				for _, o := range r.Operands(nil) {
					if o == nil || *o == v {
						continue
					}
					values = append(values, *o)
				}
				// TODO: return something different if we care about
				// multiple values as well.
				if len(values) == 1 {
					x.visitArgs(fd, values[0])
				}
			}
		}

	case ssa.Instruction:
		rands := v.Operands(nil)
		if len(rands) == 1 && rands[0] != nil {
			x.visitArgs(fd, *rands[0])
		}
	}
}

// print returns Go syntax for the specified node.
func (x *extracter) print(n ast.Node) string {
	var buf bytes.Buffer
	format.Node(&buf, x.conf.Fset, n)
	return buf.String()
}

type packageExtracter struct {
	f    *ast.File
	x    *extracter
	info *loader.PackageInfo
	cmap ast.CommentMap
}

func (px packageExtracter) getComment(n ast.Node) string {
	cs := px.cmap.Filter(n).Comments()
	if len(cs) > 0 {
		return strings.TrimSpace(cs[0].Text())
	}
	return ""
}

func (x *extracter) extractMessages() {
	prog := x.iprog
	files := []packageExtracter{}
	for _, info := range x.iprog.AllPackages {
		for _, f := range info.Files {
			// Associate comments with nodes.
			px := packageExtracter{
				f, x, info,
				ast.NewCommentMap(prog.Fset, f, f.Comments),
			}
			files = append(files, px)
		}
	}
	for _, px := range files {
		ast.Inspect(px.f, func(n ast.Node) bool {
			switch v := n.(type) {
			case *ast.CallExpr:
				if d := x.funcs[v.Lparen]; d != nil {
					d.expr = v
				}
			}
			return true
		})
	}
	for _, px := range files {
		ast.Inspect(px.f, func(n ast.Node) bool {
			switch v := n.(type) {
			case *ast.CallExpr:
				return px.handleCall(v)
			case *ast.ValueSpec:
				return px.handleGlobal(v)
			}
			return true
		})
	}
}

func (px packageExtracter) handleGlobal(spec *ast.ValueSpec) bool {
	comment := px.getComment(spec)

	for _, ident := range spec.Names {
		data, ok := px.x.globals[ident.Pos()]
		if !ok {
			continue
		}
		name := ident.Name
		var arguments []argument
		if data.call != nil {
			arguments = px.getArguments(data.call)
		} else if !strings.HasPrefix(name, "msg") && !strings.HasPrefix(name, "Msg") {
			continue
		}
		data.visit(px.x, func(c constant.Value) {
			px.addMessage(spec.Pos(), []string{name}, c, comment, arguments)
		})
	}

	return true
}

func (px packageExtracter) handleCall(call *ast.CallExpr) bool {
	x := px.x
	data := x.funcs[call.Lparen]
	if data == nil || len(data.formats) == 0 {
		return true
	}
	if data.expr != call {
		panic("invariant `data.call != call` failed")
	}
	x.debug(data.call, "INSERT", data.formats)

	argn := data.callFormatPos()
	if argn >= len(call.Args) {
		return true
	}
	format := call.Args[argn]

	arguments := px.getArguments(data)

	comment := ""
	key := []string{}
	if ident, ok := format.(*ast.Ident); ok {
		key = append(key, ident.Name)
		if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil {
			// TODO: get comment above ValueSpec as well
			comment = v.Comment.Text()
		}
	}
	if c := px.getComment(call.Args[0]); c != "" {
		comment = c
	}

	formats := data.formats
	for _, c := range formats {
		px.addMessage(call.Lparen, key, c, comment, arguments)
	}
	return true
}

func (px packageExtracter) getArguments(data *callData) []argument {
	arguments := []argument{}
	x := px.x
	info := px.info
	if data.callArgsStart() >= 0 {
		args := data.expr.Args[data.callArgsStart():]
		for i, arg := range args {
			expr := x.print(arg)
			val := ""
			if v := info.Types[arg].Value; v != nil {
				val = v.ExactString()
				switch arg.(type) {
				case *ast.BinaryExpr, *ast.UnaryExpr:
					expr = val
				}
			}
			arguments = append(arguments, argument{
				ArgNum:         i + 1,
				Type:           info.Types[arg].Type.String(),
				UnderlyingType: info.Types[arg].Type.Underlying().String(),
				Expr:           expr,
				Value:          val,
				Comment:        px.getComment(arg),
				Position:       posString(&x.conf, info.Pkg, arg.Pos()),
				// TODO report whether it implements
				// interfaces plural.Interface,
				// gender.Interface.
			})
		}
	}
	return arguments
}

func (px packageExtracter) addMessage(
	pos token.Pos,
	key []string,
	c constant.Value,
	comment string,
	arguments []argument) {
	x := px.x
	fmtMsg := constant.StringVal(c)

	ph := placeholders{index: map[string]string{}}

	trimmed, _, _ := trimWS(fmtMsg)

	p := fmtparser.Parser{}
	simArgs := make([]interface{}, len(arguments))
	for i, v := range arguments {
		simArgs[i] = v
	}
	msg := ""
	p.Reset(simArgs)
	for p.SetFormat(trimmed); p.Scan(); {
		name := ""
		var arg *argument
		switch p.Status {
		case fmtparser.StatusText:
			msg += p.Text()
			continue
		case fmtparser.StatusSubstitution,
			fmtparser.StatusBadWidthSubstitution,
			fmtparser.StatusBadPrecSubstitution:
			arguments[p.ArgNum-1].used = true
			arg = &arguments[p.ArgNum-1]
			name = getID(arg)
		case fmtparser.StatusBadArgNum, fmtparser.StatusMissingArg:
			arg = &argument{
				ArgNum:   p.ArgNum,
				Position: posString(&x.conf, px.info.Pkg, pos),
			}
			name, arg.UnderlyingType = verbToPlaceholder(p.Text(), p.ArgNum)
		}
		sub := p.Text()
		if !p.HasIndex {
			r, sz := utf8.DecodeLastRuneInString(sub)
			sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r)
		}
		msg += fmt.Sprintf("{%s}", ph.addArg(arg, name, sub))
	}
	key = append(key, msg)

	// Add additional Placeholders that can be used in translations
	// that are not present in the string.
	for _, arg := range arguments {
		if arg.used {
			continue
		}
		ph.addArg(&arg, getID(&arg), fmt.Sprintf("%%[%d]v", arg.ArgNum))
	}

	x.messages = append(x.messages, Message{
		ID:      key,
		Key:     fmtMsg,
		Message: Text{Msg: msg},
		// TODO(fix): this doesn't get the before comment.
		Comment:      comment,
		Placeholders: ph.slice,
		Position:     posString(&x.conf, px.info.Pkg, pos),
	})
}

func posString(conf *loader.Config, pkg *types.Package, pos token.Pos) string {
	p := conf.Fset.Position(pos)
	file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column)
	return filepath.Join(pkg.Path(), file)
}

func getID(arg *argument) string {
	s := getLastComponent(arg.Expr)
	s = strip(s)
	s = strings.Replace(s, " ", "", -1)
	// For small variable names, use user-defined types for more info.
	if len(s) <= 2 && arg.UnderlyingType != arg.Type {
		s = getLastComponent(arg.Type)
	}
	return strings.Title(s)
}

// strip is a dirty hack to convert function calls to placeholder IDs.
func strip(s string) string {
	s = strings.Map(func(r rune) rune {
		if unicode.IsSpace(r) || r == '-' {
			return '_'
		}
		if !unicode.In(r, unicode.Letter, unicode.Mark, unicode.Number) {
			return -1
		}
		return r
	}, s)
	// Strip "Get" from getter functions.
	if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") {
		if len(s) > len("get") {
			r, _ := utf8.DecodeRuneInString(s)
			if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark
				s = s[len("get"):]
			}
		}
	}
	return s
}

// verbToPlaceholder gives a name for a placeholder based on the substitution
// verb. This is only to be used if there is otherwise no other type information
// available.
func verbToPlaceholder(sub string, pos int) (name, underlying string) {
	r, _ := utf8.DecodeLastRuneInString(sub)
	name = fmt.Sprintf("Arg_%d", pos)
	switch r {
	case 's', 'q':
		underlying = "string"
	case 'd':
		name = "Integer"
		underlying = "int"
	case 'e', 'f', 'g':
		name = "Number"
		underlying = "float64"
	case 'm':
		name = "Message"
		underlying = "string"
	default:
		underlying = "interface{}"
	}
	return name, underlying
}

type placeholders struct {
	index map[string]string
	slice []Placeholder
}

func (p *placeholders) addArg(arg *argument, name, sub string) (id string) {
	id = name
	alt, ok := p.index[id]
	for i := 1; ok && alt != sub; i++ {
		id = fmt.Sprintf("%s_%d", name, i)
		alt, ok = p.index[id]
	}
	p.index[id] = sub
	p.slice = append(p.slice, Placeholder{
		ID:             id,
		String:         sub,
		Type:           arg.Type,
		UnderlyingType: arg.UnderlyingType,
		ArgNum:         arg.ArgNum,
		Expr:           arg.Expr,
		Comment:        arg.Comment,
	})
	return id
}

func getLastComponent(s string) string {
	return s[1+strings.LastIndexByte(s, '.'):]
}

// isMsg returns whether s should be translated.
func isMsg(s string) bool {
	// TODO: parse as format string and omit strings that contain letters
	// coming from format verbs.
	for _, r := range s {
		if unicode.In(r, unicode.L) {
			return true
		}
	}
	return false
}