Newer
Older
minecraft-ui / vendor / golang.org / x / text / message / pipeline / extract.go
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package pipeline

import (
	"bytes"
	"fmt"
	"go/ast"
	"go/constant"
	"go/format"
	"go/token"
	"go/types"
	"path"
	"path/filepath"
	"strings"
	"unicode"
	"unicode/utf8"

	fmtparser "golang.org/x/text/internal/format"
	"golang.org/x/tools/go/loader"
)

// TODO:
// - merge information into existing files
// - handle different file formats (PO, XLIFF)
// - handle features (gender, plural)
// - message rewriting

// - %m substitutions
// - `msg:"etc"` tags
// - msg/Msg top-level vars and strings.

// Extract extracts all strings form the package defined in Config.
func Extract(c *Config) (*State, error) {
	conf := loader.Config{}
	prog, err := loadPackages(&conf, c.Packages)
	if err != nil {
		return nil, wrap(err, "")
	}

	// print returns Go syntax for the specified node.
	print := func(n ast.Node) string {
		var buf bytes.Buffer
		format.Node(&buf, conf.Fset, n)
		return buf.String()
	}

	var messages []Message

	for _, info := range prog.AllPackages {
		for _, f := range info.Files {
			// Associate comments with nodes.
			cmap := ast.NewCommentMap(prog.Fset, f, f.Comments)
			getComment := func(n ast.Node) string {
				cs := cmap.Filter(n).Comments()
				if len(cs) > 0 {
					return strings.TrimSpace(cs[0].Text())
				}
				return ""
			}

			// Find function calls.
			ast.Inspect(f, func(n ast.Node) bool {
				call, ok := n.(*ast.CallExpr)
				if !ok {
					return true
				}

				// Skip calls of functions other than
				// (*message.Printer).{Sp,Fp,P}rintf.
				sel, ok := call.Fun.(*ast.SelectorExpr)
				if !ok {
					return true
				}
				meth := info.Selections[sel]
				if meth == nil || meth.Kind() != types.MethodVal {
					return true
				}
				// TODO: remove cheap hack and check if the type either
				// implements some interface or is specifically of type
				// "golang.org/x/text/message".Printer.
				m, ok := extractFuncs[path.Base(meth.Recv().String())]
				if !ok {
					return true
				}

				fmtType, ok := m[meth.Obj().Name()]
				if !ok {
					return true
				}
				// argn is the index of the format string.
				argn := fmtType.arg
				if argn >= len(call.Args) {
					return true
				}

				args := call.Args[fmtType.arg:]

				fmtMsg, ok := msgStr(info, args[0])
				if !ok {
					// TODO: identify the type of the format argument. If it
					// is not a string, multiple keys may be defined.
					return true
				}
				comment := ""
				key := []string{}
				if ident, ok := args[0].(*ast.Ident); ok {
					key = append(key, ident.Name)
					if v, ok := ident.Obj.Decl.(*ast.ValueSpec); ok && v.Comment != nil {
						// TODO: get comment above ValueSpec as well
						comment = v.Comment.Text()
					}
				}

				arguments := []argument{}
				args = args[1:]
				simArgs := make([]interface{}, len(args))
				for i, arg := range args {
					expr := print(arg)
					val := ""
					if v := info.Types[arg].Value; v != nil {
						val = v.ExactString()
						simArgs[i] = val
						switch arg.(type) {
						case *ast.BinaryExpr, *ast.UnaryExpr:
							expr = val
						}
					}
					arguments = append(arguments, argument{
						ArgNum:         i + 1,
						Type:           info.Types[arg].Type.String(),
						UnderlyingType: info.Types[arg].Type.Underlying().String(),
						Expr:           expr,
						Value:          val,
						Comment:        getComment(arg),
						Position:       posString(conf, info, arg.Pos()),
						// TODO report whether it implements
						// interfaces plural.Interface,
						// gender.Interface.
					})
				}
				msg := ""

				ph := placeholders{index: map[string]string{}}

				trimmed, _, _ := trimWS(fmtMsg)

				p := fmtparser.Parser{}
				p.Reset(simArgs)
				for p.SetFormat(trimmed); p.Scan(); {
					switch p.Status {
					case fmtparser.StatusText:
						msg += p.Text()
					case fmtparser.StatusSubstitution,
						fmtparser.StatusBadWidthSubstitution,
						fmtparser.StatusBadPrecSubstitution:
						arguments[p.ArgNum-1].used = true
						arg := arguments[p.ArgNum-1]
						sub := p.Text()
						if !p.HasIndex {
							r, sz := utf8.DecodeLastRuneInString(sub)
							sub = fmt.Sprintf("%s[%d]%c", sub[:len(sub)-sz], p.ArgNum, r)
						}
						msg += fmt.Sprintf("{%s}", ph.addArg(&arg, sub))
					}
				}
				key = append(key, msg)

				// Add additional Placeholders that can be used in translations
				// that are not present in the string.
				for _, arg := range arguments {
					if arg.used {
						continue
					}
					ph.addArg(&arg, fmt.Sprintf("%%[%d]v", arg.ArgNum))
				}

				if c := getComment(call.Args[0]); c != "" {
					comment = c
				}

				messages = append(messages, Message{
					ID:      key,
					Key:     fmtMsg,
					Message: Text{Msg: msg},
					// TODO(fix): this doesn't get the before comment.
					Comment:      comment,
					Placeholders: ph.slice,
					Position:     posString(conf, info, call.Lparen),
				})
				return true
			})
		}
	}

	return &State{
		Config:  *c,
		program: prog,
		Extracted: Messages{
			Language: c.SourceLanguage,
			Messages: messages,
		},
	}, nil
}

func posString(conf loader.Config, info *loader.PackageInfo, pos token.Pos) string {
	p := conf.Fset.Position(pos)
	file := fmt.Sprintf("%s:%d:%d", filepath.Base(p.Filename), p.Line, p.Column)
	return filepath.Join(info.Pkg.Path(), file)
}

// extractFuncs indicates the types and methods for which to extract strings,
// and which argument to extract.
// TODO: use the types in conf.Import("golang.org/x/text/message") to extract
// the correct instances.
var extractFuncs = map[string]map[string]extractType{
	// TODO: Printer -> *golang.org/x/text/message.Printer
	"message.Printer": {
		"Printf":  extractType{arg: 0, format: true},
		"Sprintf": extractType{arg: 0, format: true},
		"Fprintf": extractType{arg: 1, format: true},

		"Lookup": extractType{arg: 0},
	},
}

type extractType struct {
	// format indicates if the next arg is a formatted string or whether to
	// concatenate all arguments
	format bool
	// arg indicates the position of the argument to extract.
	arg int
}

func getID(arg *argument) string {
	s := getLastComponent(arg.Expr)
	s = strip(s)
	s = strings.Replace(s, " ", "", -1)
	// For small variable names, use user-defined types for more info.
	if len(s) <= 2 && arg.UnderlyingType != arg.Type {
		s = getLastComponent(arg.Type)
	}
	return strings.Title(s)
}

// strip is a dirty hack to convert function calls to placeholder IDs.
func strip(s string) string {
	s = strings.Map(func(r rune) rune {
		if unicode.IsSpace(r) || r == '-' {
			return '_'
		}
		if !unicode.In(r, unicode.Letter, unicode.Mark, unicode.Number) {
			return -1
		}
		return r
	}, s)
	// Strip "Get" from getter functions.
	if strings.HasPrefix(s, "Get") || strings.HasPrefix(s, "get") {
		if len(s) > len("get") {
			r, _ := utf8.DecodeRuneInString(s)
			if !unicode.In(r, unicode.Ll, unicode.M) { // not lower or mark
				s = s[len("get"):]
			}
		}
	}
	return s
}

type placeholders struct {
	index map[string]string
	slice []Placeholder
}

func (p *placeholders) addArg(arg *argument, sub string) (id string) {
	id = getID(arg)
	id1 := id
	alt, ok := p.index[id1]
	for i := 1; ok && alt != sub; i++ {
		id1 = fmt.Sprintf("%s_%d", id, i)
		alt, ok = p.index[id1]
	}
	p.index[id1] = sub
	p.slice = append(p.slice, Placeholder{
		ID:             id1,
		String:         sub,
		Type:           arg.Type,
		UnderlyingType: arg.UnderlyingType,
		ArgNum:         arg.ArgNum,
		Expr:           arg.Expr,
		Comment:        arg.Comment,
	})
	return id1
}

func getLastComponent(s string) string {
	return s[1+strings.LastIndexByte(s, '.'):]
}

func msgStr(info *loader.PackageInfo, e ast.Expr) (s string, ok bool) {
	v := info.Types[e].Value
	if v == nil || v.Kind() != constant.String {
		return "", false
	}
	s = constant.StringVal(v)
	// Only record strings with letters.
	for _, r := range s {
		if unicode.In(r, unicode.L) {
			return s, true
		}
	}
	return "", false
}