garble/main.go

// Copyright (c) 2019, The Garble Authors.
// See LICENSE for licensing information.

// garble obfuscates Go code by wrapping the Go toolchain.
package main

import (
	"bufio"
	"bytes"
	"cmp"
	cryptorand "crypto/rand"
	"encoding/base64"
	"encoding/binary"
	"encoding/gob"
	"encoding/json"
	"errors"
	"flag"
	"fmt"
	"go/ast"
	"go/importer"
	"go/parser"
	"go/token"
	"go/types"
	"go/version"
	"io"
	"io/fs"
	"log"
	mathrand "math/rand"
	"os"
	"os/exec"
	"path/filepath"
	"regexp"
	"runtime"
	"runtime/debug"
	"strconv"
	"strings"
	"time"
	"unicode"
	"unicode/utf8"

	"github.com/rogpeppe/go-internal/cache"
	"golang.org/x/exp/maps"
	"golang.org/x/exp/slices"
	"golang.org/x/mod/module"
	"golang.org/x/tools/go/ast/astutil"
	"golang.org/x/tools/go/ssa"
	"mvdan.cc/garble/internal/ctrlflow"

	"mvdan.cc/garble/internal/linker"
	"mvdan.cc/garble/internal/literals"
)

var flagSet = flag.NewFlagSet("garble", flag.ContinueOnError)

var (
	flagLiterals bool
	flagTiny     bool
	flagDebug    bool
	flagDebugDir string
	flagSeed     seedFlag
	// TODO(pagran): in the future, when control flow obfuscation will be stable migrate to flag
	flagControlFlow = os.Getenv("GARBLE_EXPERIMENTAL_CONTROLFLOW") == "1"
)

func init() {
	flagSet.Usage = usage
	flagSet.BoolVar(&flagLiterals, "literals", false, "Obfuscate literals such as strings")
	flagSet.BoolVar(&flagTiny, "tiny", false, "Optimize for binary size, losing some ability to reverse the process")
	flagSet.BoolVar(&flagDebug, "debug", false, "Print debug logs to stderr")
	flagSet.StringVar(&flagDebugDir, "debugdir", "", "Write the obfuscated source to a directory, e.g. -debugdir=out")
	flagSet.Var(&flagSeed, "seed", "Provide a base64-encoded seed, e.g. -seed=o9WDTZ4CN4w\nFor a random seed, provide -seed=random")
}

var rxGarbleFlag = regexp.MustCompile(`-(?:literals|tiny|debug|debugdir|seed)(?:$|=)`)

type seedFlag struct {
	random bool
	bytes  []byte
}

func (f seedFlag) present() bool { return len(f.bytes) > 0 }

func (f seedFlag) String() string {
	return base64.RawStdEncoding.EncodeToString(f.bytes)
}

func (f *seedFlag) Set(s string) error {
	if s == "random" {
		f.random = true // to show the random seed we chose

		f.bytes = make([]byte, 16) // random 128 bit seed
		if _, err := cryptorand.Read(f.bytes); err != nil {
			return fmt.Errorf("error generating random seed: %v", err)
		}
	} else {
		// We expect unpadded base64, but to be nice, accept padded
		// strings too.
		s = strings.TrimRight(s, "=")
		seed, err := base64.RawStdEncoding.DecodeString(s)
		if err != nil {
			return fmt.Errorf("error decoding seed: %v", err)
		}

		// TODO: Note that we always use 8 bytes; any bytes after that are
		// entirely ignored. That may be confusing to the end user.
		if len(seed) < 8 {
			return fmt.Errorf("-seed needs at least 8 bytes, have %d", len(seed))
		}
		f.bytes = seed
	}
	return nil
}

func usage() {
	fmt.Fprintf(os.Stderr, `
Garble obfuscates Go code by wrapping the Go toolchain.

	garble [garble flags] command [go flags] [go arguments]

For example, to build an obfuscated program:

	garble build ./cmd/foo

Similarly, to combine garble flags and Go build flags:

	garble -literals build -tags=purego ./cmd/foo

The following commands are supported:

	build          replace "go build"
	test           replace "go test"
	run            replace "go run"
	reverse        de-obfuscate output such as stack traces
	version        print the version and build settings of the garble binary

To learn more about a command, run "garble help <command>".

garble accepts the following flags before a command:

`[1:])
	flagSet.PrintDefaults()
	fmt.Fprintf(os.Stderr, `

For more information, see https://github.com/burrowers/garble.
`[1:])
}

func main() { os.Exit(main1()) }

var (
	// Presumably OK to share fset across packages.
	fset = token.NewFileSet()

	sharedTempDir = os.Getenv("GARBLE_SHARED")
	parentWorkDir = os.Getenv("GARBLE_PARENT_WORK")
)

const actionGraphFileName = "action-graph.json"

type importerWithMap struct {
	importMap  map[string]string
	importFrom func(path, dir string, mode types.ImportMode) (*types.Package, error)
}

func (im importerWithMap) Import(path string) (*types.Package, error) {
	panic("should never be called")
}

func (im importerWithMap) ImportFrom(path, dir string, mode types.ImportMode) (*types.Package, error) {
	if path2 := im.importMap[path]; path2 != "" {
		path = path2
	}
	return im.importFrom(path, dir, mode)
}

func importerForPkg(lpkg *listedPackage) importerWithMap {
	return importerWithMap{
		importFrom: importer.ForCompiler(fset, "gc", func(path string) (io.ReadCloser, error) {
			pkg, err := listPackage(lpkg, path)
			if err != nil {
				return nil, err
			}
			return os.Open(pkg.Export)
		}).(types.ImporterFrom).ImportFrom,
		importMap: lpkg.ImportMap,
	}
}

// uniqueLineWriter sits underneath log.SetOutput to deduplicate log lines.
// We log bits of useful information for debugging,
// and logging the same detail twice is not going to help the user.
// Duplicates are relatively normal, given that names tend to repeat.
type uniqueLineWriter struct {
	out  io.Writer
	seen map[string]bool
}

func (w *uniqueLineWriter) Write(p []byte) (n int, err error) {
	if !flagDebug {
		panic("unexpected use of uniqueLineWriter with -debug unset")
	}
	if bytes.Count(p, []byte("\n")) != 1 {
		return 0, fmt.Errorf("log write wasn't just one line: %q", p)
	}
	if w.seen[string(p)] {
		return len(p), nil
	}
	if w.seen == nil {
		w.seen = make(map[string]bool)
	}
	w.seen[string(p)] = true
	return w.out.Write(p)
}

// debugSince is like time.Since but resulting in shorter output.
// A build process takes at least hundreds of milliseconds,
// so extra decimal points in the order of microseconds aren't meaningful.
func debugSince(start time.Time) time.Duration {
	return time.Since(start).Truncate(10 * time.Microsecond)
}

func main1() int {
	defer func() {
		if os.Getenv("GARBLE_WRITE_ALLOCS") != "true" {
			return
		}
		var memStats runtime.MemStats
		runtime.ReadMemStats(&memStats)
		fmt.Fprintf(os.Stderr, "garble allocs: %d\n", memStats.Mallocs)
	}()
	if err := flagSet.Parse(os.Args[1:]); err != nil {
		return 2
	}
	log.SetPrefix("[garble] ")
	log.SetFlags(0) // no timestamps, as they aren't very useful
	if flagDebug {
		// TODO: cover this in the tests.
		log.SetOutput(&uniqueLineWriter{out: os.Stderr})
	} else {
		log.SetOutput(io.Discard)
	}
	args := flagSet.Args()
	if len(args) < 1 {
		usage()
		return 2
	}

	// If a random seed was used, the user won't be able to reproduce the
	// same output or failure unless we print the random seed we chose.
	// If the build failed and a random seed was used,
	// the failure might not reproduce with a different seed.
	// Print it before we exit.
	if flagSeed.random {
		fmt.Fprintf(os.Stderr, "-seed chosen at random: %s\n", base64.RawStdEncoding.EncodeToString(flagSeed.bytes))
	}
	if err := mainErr(args); err != nil {
		if code, ok := err.(errJustExit); ok {
			return int(code)
		}
		fmt.Fprintln(os.Stderr, err)
		return 1
	}
	return 0
}

type errJustExit int

func (e errJustExit) Error() string { return fmt.Sprintf("exit: %d", e) }

func goVersionOK() bool {
	const (
		minGoVersion = "go1.22" // the first major version we support
		maxGoVersion = "go1.23" // the first major version we don't support
	)

	// rxVersion looks for a version like "go1.2" or "go1.2.3" in `go env GOVERSION`.
	rxVersion := regexp.MustCompile(`go\d+\.\d+(?:\.\d+)?`)

	toolchainVersionFull := sharedCache.GoEnv.GOVERSION
	sharedCache.GoVersion = rxVersion.FindString(toolchainVersionFull)
	if sharedCache.GoVersion == "" {
		// Go 1.15.x and older did not have GOVERSION yet; they are too old anyway.
		fmt.Fprintf(os.Stderr, "Go version is too old; please upgrade to %s or newer\n", minGoVersion)
		return false
	}

	if version.Compare(sharedCache.GoVersion, minGoVersion) < 0 {
		fmt.Fprintf(os.Stderr, "Go version %q is too old; please upgrade to %s or newer\n", toolchainVersionFull, minGoVersion)
		return false
	}
	if version.Compare(sharedCache.GoVersion, maxGoVersion) >= 0 {
		fmt.Fprintf(os.Stderr, "Go version %q is too new; Go linker patches aren't available for %s or later yet\n", toolchainVersionFull, maxGoVersion)
		return false
	}

	// Ensure that the version of Go that built the garble binary is equal or
	// newer than cache.GoVersionSemver.
	builtVersionFull := cmp.Or(os.Getenv("GARBLE_TEST_GOVERSION"), runtime.Version())
	builtVersion := rxVersion.FindString(builtVersionFull)
	if builtVersion == "" {
		// If garble built itself, we don't know what Go version was used.
		// Fall back to not performing the check against the toolchain version.
		return true
	}
	if version.Compare(builtVersion, sharedCache.GoVersion) < 0 {
		fmt.Fprintf(os.Stderr, `
garble was built with %q and can't be used with the newer %q; rebuild it with a command like:
    go install mvdan.cc/garble@latest
`[1:], builtVersionFull, toolchainVersionFull)
		return false
	}

	return true
}

func mainErr(args []string) error {
	command, args := args[0], args[1:]

	// Catch users reaching for `go build -toolexec=garble`.
	if command != "toolexec" && len(args) == 1 && args[0] == "-V=full" {
		return fmt.Errorf(`did you run "go [command] -toolexec=garble" instead of "garble [command]"?`)
	}

	switch command {
	case "help":
		if hasHelpFlag(args) || len(args) > 1 {
			fmt.Fprintf(os.Stderr, "usage: garble help [command]\n")
			return errJustExit(2)
		}
		if len(args) == 1 {
			return mainErr([]string{args[0], "-h"})
		}
		usage()
		return errJustExit(2)
	case "version":
		if hasHelpFlag(args) || len(args) > 0 {
			fmt.Fprintf(os.Stderr, "usage: garble version\n")
			return errJustExit(2)
		}
		info, ok := debug.ReadBuildInfo()
		if !ok {
			// The build binary was stripped of build info?
			// Could be the case if garble built itself.
			fmt.Println("unknown")
			return nil
		}
		mod := &info.Main
		if mod.Replace != nil {
			mod = mod.Replace
		}

		// For the tests.
		if v := os.Getenv("GARBLE_TEST_BUILDSETTINGS"); v != "" {
			var extra []debug.BuildSetting
			if err := json.Unmarshal([]byte(v), &extra); err != nil {
				return err
			}
			info.Settings = append(info.Settings, extra...)
		}

		// Until https://github.com/golang/go/issues/50603 is implemented,
		// manually construct something like a pseudo-version.
		// TODO: remove when this code is dead, hopefully in Go 1.22.
		if mod.Version == "(devel)" {
			var vcsTime time.Time
			var vcsRevision string
			for _, setting := range info.Settings {
				switch setting.Key {
				case "vcs.time":
					// If the format is invalid, we'll print a zero timestamp.
					vcsTime, _ = time.Parse(time.RFC3339Nano, setting.Value)
				case "vcs.revision":
					vcsRevision = setting.Value
					if len(vcsRevision) > 12 {
						vcsRevision = vcsRevision[:12]
					}
				}
			}
			if vcsRevision != "" {
				mod.Version = module.PseudoVersion("", "", vcsTime, vcsRevision)
			}
		}

		fmt.Printf("%s %s\n\n", mod.Path, mod.Version)
		fmt.Printf("Build settings:\n")
		for _, setting := range info.Settings {
			if setting.Value == "" {
				continue // do empty build settings even matter?
			}
			// The padding helps keep readability by aligning:
			//
			//   veryverylong.key value
			//          short.key some-other-value
			//
			// Empirically, 16 is enough; the longest key seen is "vcs.revision".
			fmt.Printf("%16s %s\n", setting.Key, setting.Value)
		}
		return nil
	case "reverse":
		return commandReverse(args)
	case "build", "test", "run":
		cmd, err := toolexecCmd(command, args)
		defer func() {
			if err := os.RemoveAll(os.Getenv("GARBLE_SHARED")); err != nil {
				fmt.Fprintf(os.Stderr, "could not clean up GARBLE_SHARED: %v\n", err)
			}
			// skip the trim if we didn't even start a build
			if sharedCache != nil {
				fsCache, err := openCache()
				if err == nil {
					err = fsCache.Trim()
				}
				if err != nil {
					fmt.Fprintf(os.Stderr, "could not trim GARBLE_CACHE: %v\n", err)
				}
			}
		}()
		if err != nil {
			return err
		}
		cmd.Stdout = os.Stdout
		cmd.Stderr = os.Stderr
		log.Printf("calling via toolexec: %s", cmd)
		return cmd.Run()

	case "toolexec":
		_, tool := filepath.Split(args[0])
		if runtime.GOOS == "windows" {
			tool = strings.TrimSuffix(tool, ".exe")
		}
		transform := transformMethods[tool]
		transformed := args[1:]
		if transform != nil {
			startTime := time.Now()
			log.Printf("transforming %s with args: %s", tool, strings.Join(transformed, " "))

			// We're in a toolexec sub-process, not directly called by the user.
			// Load the shared data and wrap the tool, like the compiler or linker.
			if err := loadSharedCache(); err != nil {
				return err
			}

			if len(args) == 2 && args[1] == "-V=full" {
				return alterToolVersion(tool, args)
			}
			var tf transformer
			toolexecImportPath := os.Getenv("TOOLEXEC_IMPORTPATH")
			tf.curPkg = sharedCache.ListedPackages[toolexecImportPath]
			if tf.curPkg == nil {
				return fmt.Errorf("TOOLEXEC_IMPORTPATH not found in listed packages: %s", toolexecImportPath)
			}
			tf.origImporter = importerForPkg(tf.curPkg)

			var err error
			if transformed, err = transform(&tf, transformed); err != nil {
				return err
			}
			log.Printf("transformed args for %s in %s: %s", tool, debugSince(startTime), strings.Join(transformed, " "))
		} else {
			log.Printf("skipping transform on %s with args: %s", tool, strings.Join(transformed, " "))
		}

		executablePath := args[0]
		if tool == "link" {
			modifiedLinkPath, unlock, err := linker.PatchLinker(sharedCache.GoEnv.GOROOT, sharedCache.GoEnv.GOVERSION, sharedCache.CacheDir, sharedTempDir)
			if err != nil {
				return fmt.Errorf("cannot get modified linker: %v", err)
			}
			defer unlock()

			executablePath = modifiedLinkPath
			os.Setenv(linker.MagicValueEnv, strconv.FormatUint(uint64(magicValue()), 10))
			os.Setenv(linker.EntryOffKeyEnv, strconv.FormatUint(uint64(entryOffKey()), 10))
			if flagTiny {
				os.Setenv(linker.TinyEnv, "true")
			}

			log.Printf("replaced linker with: %s", executablePath)
		}

		cmd := exec.Command(executablePath, transformed...)
		cmd.Stdout = os.Stdout
		cmd.Stderr = os.Stderr
		if err := cmd.Run(); err != nil {
			return err
		}
		return nil
	default:
		return fmt.Errorf("unknown command: %q", command)
	}
}

func hasHelpFlag(flags []string) bool {
	for _, f := range flags {
		switch f {
		case "-h", "-help", "--help":
			return true
		}
	}
	return false
}

// toolexecCmd builds an *exec.Cmd which is set up for running "go <command>"
// with -toolexec=garble and the supplied arguments.
//
// Note that it uses and modifies global state; in general, it should only be
// called once from mainErr in the top-level garble process.
func toolexecCmd(command string, args []string) (*exec.Cmd, error) {
	// Split the flags from the package arguments, since we'll need
	// to run 'go list' on the same set of packages.
	flags, args := splitFlagsFromArgs(args)
	if hasHelpFlag(flags) {
		out, _ := exec.Command("go", command, "-h").CombinedOutput()
		fmt.Fprintf(os.Stderr, `
usage: garble [garble flags] %s [arguments]

This command wraps "go %s". Below is its help:

%s`[1:], command, command, out)
		return nil, errJustExit(2)
	}
	for _, flag := range flags {
		if rxGarbleFlag.MatchString(flag) {
			return nil, fmt.Errorf("garble flags must precede command, like: garble %s build ./pkg", flag)
		}
	}

	// Here is the only place we initialize the cache.
	// The sub-processes will parse it from a shared gob file.
	sharedCache = &sharedCacheType{}

	// Note that we also need to pass build flags to 'go list', such
	// as -tags.
	sharedCache.ForwardBuildFlags, _ = filterForwardBuildFlags(flags)
	if command == "test" {
		sharedCache.ForwardBuildFlags = append(sharedCache.ForwardBuildFlags, "-test")
	}

	if err := fetchGoEnv(); err != nil {
		return nil, err
	}

	if !goVersionOK() {
		return nil, errJustExit(1)
	}

	var err error
	sharedCache.ExecPath, err = os.Executable()
	if err != nil {
		return nil, err
	}

	// Always an absolute directory; defaults to e.g. "~/.cache/garble".
	if dir := os.Getenv("GARBLE_CACHE"); dir != "" {
		sharedCache.CacheDir, err = filepath.Abs(dir)
		if err != nil {
			return nil, err
		}
	} else {
		parentDir, err := os.UserCacheDir()
		if err != nil {
			return nil, err
		}
		sharedCache.CacheDir = filepath.Join(parentDir, "garble")
	}

	binaryBuildID, err := buildidOf(sharedCache.ExecPath)
	if err != nil {
		return nil, err
	}
	sharedCache.BinaryContentID = decodeBuildIDHash(splitContentID(binaryBuildID))

	if err := appendListedPackages(args, true); err != nil {
		return nil, err
	}

	sharedTempDir, err = saveSharedCache()
	if err != nil {
		return nil, err
	}
	os.Setenv("GARBLE_SHARED", sharedTempDir)
	wd, err := os.Getwd()
	if err != nil {
		return nil, err
	}
	os.Setenv("GARBLE_PARENT_WORK", wd)

	if flagDebugDir != "" {
		if !filepath.IsAbs(flagDebugDir) {
			flagDebugDir = filepath.Join(wd, flagDebugDir)
		}

		if err := os.RemoveAll(flagDebugDir); err != nil {
			return nil, fmt.Errorf("could not empty debugdir: %v", err)
		}
		if err := os.MkdirAll(flagDebugDir, 0o755); err != nil {
			return nil, err
		}
	}

	goArgs := append([]string{command}, garbleBuildFlags...)

	// Pass the garble flags down to each toolexec invocation.
	// This way, all garble processes see the same flag values.
	// Note that we can end up with a single argument to `go` in the form of:
	//
	//	-toolexec='/binary dir/garble' -tiny toolexec
	//
	// We quote the absolute path to garble if it contains spaces.
	// We can add extra flags to the end of the same -toolexec argument.
	var toolexecFlag strings.Builder
	toolexecFlag.WriteString("-toolexec=")
	quotedExecPath, err := cmdgoQuotedJoin([]string{sharedCache.ExecPath})
	if err != nil {
		// Can only happen if the absolute path to the garble binary contains
		// both single and double quotes. Seems extremely unlikely.
		return nil, err
	}
	toolexecFlag.WriteString(quotedExecPath)
	appendFlags(&toolexecFlag, false)
	toolexecFlag.WriteString(" toolexec")
	goArgs = append(goArgs, toolexecFlag.String())

	if flagControlFlow {
		goArgs = append(goArgs, "-debug-actiongraph", filepath.Join(sharedTempDir, actionGraphFileName))
	}
	if flagDebugDir != "" {
		// In case the user deletes the debug directory,
		// and a previous build is cached,
		// rebuild all packages to re-fill the debug dir.
		goArgs = append(goArgs, "-a")
	}
	if command == "test" {
		// vet is generally not useful on obfuscated code; keep it
		// disabled by default.
		goArgs = append(goArgs, "-vet=off")
	}
	goArgs = append(goArgs, flags...)
	goArgs = append(goArgs, args...)

	return exec.Command("go", goArgs...), nil
}

var transformMethods = map[string]func(*transformer, []string) ([]string, error){
	"asm":     (*transformer).transformAsm,
	"compile": (*transformer).transformCompile,
	"link":    (*transformer).transformLink,
}

func (tf *transformer) transformAsm(args []string) ([]string, error) {
	flags, paths := splitFlagsFromFiles(args, ".s")

	// When assembling, the import path can make its way into the output object file.
	if tf.curPkg.Name != "main" && tf.curPkg.ToObfuscate {
		flags = flagSetValue(flags, "-p", tf.curPkg.obfuscatedImportPath())
	}

	flags = alterTrimpath(flags)

	// The assembler runs twice; the first with -gensymabis,
	// where we continue below and we obfuscate all the source.
	// The second time, without -gensymabis, we reconstruct the paths to the
	// obfuscated source files and reuse them to avoid work.
	newPaths := make([]string, 0, len(paths))
	if !slices.Contains(args, "-gensymabis") {
		for _, path := range paths {
			name := hashWithPackage(tf.curPkg, filepath.Base(path)) + ".s"
			pkgDir := filepath.Join(sharedTempDir, tf.curPkg.obfuscatedImportPath())
			newPath := filepath.Join(pkgDir, name)
			newPaths = append(newPaths, newPath)
		}
		return append(flags, newPaths...), nil
	}

	const missingHeader = "missing header path"
	newHeaderPaths := make(map[string]string)
	var buf, includeBuf bytes.Buffer
	for _, path := range paths {
		buf.Reset()
		f, err := os.Open(path)
		if err != nil {
			return nil, err
		}
		defer f.Close() // in case of error
		scanner := bufio.NewScanner(f)
		for scanner.Scan() {
			line := scanner.Text()

			// Whole-line comments might be directives, leave them in place.
			// For example: //go:build race
			// Any other comment, including inline ones, can be discarded entirely.
			line, comment, hasComment := strings.Cut(line, "//")
			if hasComment && line == "" {
				buf.WriteString("//")
				buf.WriteString(comment)
				buf.WriteByte('\n')
				continue
			}

			// Preprocessor lines to include another file.
			// For example: #include "foo.h"
			if quoted := strings.TrimPrefix(line, "#include"); quoted != line {
				quoted = strings.TrimSpace(quoted)
				path, err := strconv.Unquote(quoted)
				if err != nil { // note that strconv.Unquote errors do not include the input string
					return nil, fmt.Errorf("cannot unquote %q: %v", quoted, err)
				}
				newPath := newHeaderPaths[path]
				switch newPath {
				case missingHeader: // no need to try again
					buf.WriteString(line)
					buf.WriteByte('\n')
					continue
				case "": // first time we see this header
					includeBuf.Reset()
					content, err := os.ReadFile(path)
					if errors.Is(err, fs.ErrNotExist) {
						newHeaderPaths[path] = missingHeader
						buf.WriteString(line)
						buf.WriteByte('\n')
						continue // a header file provided by Go or the system
					} else if err != nil {
						return nil, err
					}
					tf.replaceAsmNames(&includeBuf, content)

					// For now, we replace `foo.h` or `dir/foo.h` with `garbled_foo.h`.
					// The different name ensures we don't use the unobfuscated file.
					// This is far from perfect, but does the job for the time being.
					// In the future, use a randomized name.
					basename := filepath.Base(path)
					newPath = "garbled_" + basename

					if _, err := tf.writeSourceFile(basename, newPath, includeBuf.Bytes()); err != nil {
						return nil, err
					}
					newHeaderPaths[path] = newPath
				}
				buf.WriteString("#include ")
				buf.WriteString(strconv.Quote(newPath))
				buf.WriteByte('\n')
				continue
			}

			// Anything else is regular assembly; replace the names.
			tf.replaceAsmNames(&buf, []byte(line))
			buf.WriteByte('\n')
		}
		if err := scanner.Err(); err != nil {
			return nil, err
		}

		// With assembly files, we obfuscate the filename in the temporary
		// directory, as assembly files do not support `/*line` directives.
		// TODO(mvdan): per cmd/asm/internal/lex, they do support `#line`.
		basename := filepath.Base(path)
		newName := hashWithPackage(tf.curPkg, basename) + ".s"
		if path, err := tf.writeSourceFile(basename, newName, buf.Bytes()); err != nil {
			return nil, err
		} else {
			newPaths = append(newPaths, path)
		}
		f.Close() // do not keep len(paths) files open
	}

	return append(flags, newPaths...), nil
}

func (tf *transformer) replaceAsmNames(buf *bytes.Buffer, remaining []byte) {
	// We need to replace all function references with their obfuscated name
	// counterparts.
	// Luckily, all func names in Go assembly files are immediately followed
	// by the unicode "middle dot", like:
	//
	//	TEXT ·privateAdd(SB),$0-24
	//	TEXT runtime∕internal∕sys·Ctz64(SB), NOSPLIT, $0-12
	//
	// Note that import paths in assembly, like `runtime∕internal∕sys` above,
	// use Unicode periods and slashes rather than the ASCII ones used by `go list`.
	// We need to convert to ASCII to find the right package information.
	const (
		asmPeriod = '·'
		goPeriod  = '.'
		asmSlash  = '∕'
		goSlash   = '/'
	)
	asmPeriodLen := utf8.RuneLen(asmPeriod)

	for {
		periodIdx := bytes.IndexRune(remaining, asmPeriod)
		if periodIdx < 0 {
			buf.Write(remaining)
			remaining = nil
			break
		}

		// The package name ends at the first rune which cannot be part of a Go
		// import path, such as a comma or space.
		pkgStart := periodIdx
		for pkgStart >= 0 {
			c, size := utf8.DecodeLastRune(remaining[:pkgStart])
			if !unicode.IsLetter(c) && c != '_' && c != asmSlash && !unicode.IsDigit(c) {
				break
			}
			pkgStart -= size
		}
		// The package name might actually be longer, e.g:
		//
		//	JMP test∕with·many·dots∕main∕imported·PublicAdd(SB)
		//
		// We have `test∕with` so far; grab `·many·dots∕main∕imported` as well.
		pkgEnd := periodIdx
		lastAsmPeriod := -1
		for i := pkgEnd + asmPeriodLen; i <= len(remaining); {
			c, size := utf8.DecodeRune(remaining[i:])
			if c == asmPeriod {
				lastAsmPeriod = i
			} else if !unicode.IsLetter(c) && c != '_' && c != asmSlash && !unicode.IsDigit(c) {
				if lastAsmPeriod > 0 {
					pkgEnd = lastAsmPeriod
				}
				break
			}
			i += size
		}
		asmPkgPath := string(remaining[pkgStart:pkgEnd])

		// Write the bytes before our unqualified `·foo` or qualified `pkg·foo`.
		buf.Write(remaining[:pkgStart])

		// If the name was qualified, fetch the package, and write the
		// obfuscated import path if needed.
		// Note that we don't obfuscate the package path "main".
		lpkg := tf.curPkg
		if asmPkgPath != "" && asmPkgPath != "main" {
			if asmPkgPath != tf.curPkg.Name {
				goPkgPath := asmPkgPath
				goPkgPath = strings.ReplaceAll(goPkgPath, string(asmPeriod), string(goPeriod))
				goPkgPath = strings.ReplaceAll(goPkgPath, string(asmSlash), string(goSlash))
				var err error
				lpkg, err = listPackage(tf.curPkg, goPkgPath)
				if err != nil {
					panic(err) // shouldn't happen
				}
			}
			if lpkg.ToObfuscate {
				// Note that we don't need to worry about asmSlash here,
				// because our obfuscated import paths contain no slashes right now.
				buf.WriteString(lpkg.obfuscatedImportPath())
			} else {
				buf.WriteString(asmPkgPath)
			}
		}

		// Write the middle dot and advance the remaining slice.
		buf.WriteRune(asmPeriod)
		remaining = remaining[pkgEnd+asmPeriodLen:]

		// The declared name ends at the first rune which cannot be part of a Go
		// identifier, such as a comma or space.
		nameEnd := 0
		for nameEnd < len(remaining) {
			c, size := utf8.DecodeRune(remaining[nameEnd:])
			if !unicode.IsLetter(c) && c != '_' && !unicode.IsDigit(c) {
				break
			}
			nameEnd += size
		}
		name := string(remaining[:nameEnd])
		remaining = remaining[nameEnd:]

		if lpkg.ToObfuscate && !compilerIntrinsicsFuncs[lpkg.ImportPath+"."+name] {
			newName := hashWithPackage(lpkg, name)
			if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed
				log.Printf("asm name %q hashed with %x to %q", name, tf.curPkg.GarbleActionID, newName)
			}
			buf.WriteString(newName)
		} else {
			buf.WriteString(name)
		}
	}
}

// writeSourceFile is a mix between os.CreateTemp and os.WriteFile, as it writes a
// named source file in sharedTempDir given an input buffer.
//
// Note that the file is created under a directory tree following curPkg's
// import path, mimicking how files are laid out in modules and GOROOT.
func (tf *transformer) writeSourceFile(basename, obfuscated string, content []byte) (string, error) {
	// Uncomment for some quick debugging. Do not delete.
	// fmt.Fprintf(os.Stderr, "\n-- %s/%s --\n%s", curPkg.ImportPath, basename, content)

	if flagDebugDir != "" {
		pkgDir := filepath.Join(flagDebugDir, filepath.FromSlash(tf.curPkg.ImportPath))
		if err := os.MkdirAll(pkgDir, 0o755); err != nil {
			return "", err
		}
		dstPath := filepath.Join(pkgDir, basename)
		if err := os.WriteFile(dstPath, content, 0o666); err != nil {
			return "", err
		}
	}
	// We use the obfuscated import path to hold the temporary files.
	// Assembly files do not support line directives to set positions,
	// so the only way to not leak the import path is to replace it.
	pkgDir := filepath.Join(sharedTempDir, tf.curPkg.obfuscatedImportPath())
	if err := os.MkdirAll(pkgDir, 0o777); err != nil {
		return "", err
	}
	dstPath := filepath.Join(pkgDir, obfuscated)
	if err := writeFileExclusive(dstPath, content); err != nil {
		return "", err
	}
	return dstPath, nil
}

// parseFiles parses a list of Go files.
// It supports relative file paths, such as those found in listedPackage.CompiledGoFiles,
// as long as dir is set to listedPackage.Dir.
func parseFiles(dir string, paths []string) ([]*ast.File, error) {
	var files []*ast.File
	for _, path := range paths {
		if !filepath.IsAbs(path) {
			path = filepath.Join(dir, path)
		}
		file, err := parser.ParseFile(fset, path, nil, parser.SkipObjectResolution|parser.ParseComments)
		if err != nil {
			return nil, err
		}
		files = append(files, file)
	}
	return files, nil
}

func (tf *transformer) transformCompile(args []string) ([]string, error) {
	flags, paths := splitFlagsFromFiles(args, ".go")

	// We will force the linker to drop DWARF via -w, so don't spend time
	// generating it.
	flags = append(flags, "-dwarf=false")

	// The Go file paths given to the compiler are always absolute paths.
	files, err := parseFiles("", paths)
	if err != nil {
		return nil, err
	}

	// Literal and control flow obfuscation uses math/rand, so seed it deterministically.
	randSeed := tf.curPkg.GarbleActionID[:]
	if flagSeed.present() {
		randSeed = flagSeed.bytes
	}
	// log.Printf("seeding math/rand with %x\n", randSeed)
	tf.obfRand = mathrand.New(mathrand.NewSource(int64(binary.BigEndian.Uint64(randSeed))))

	// Even if loadPkgCache below finds a direct cache hit,
	// other parts of garble still need type information to obfuscate.
	// We could potentially avoid this by saving the type info we need in the cache,
	// although in general that wouldn't help much, since it's rare for Go's cache
	// to miss on a package and for our cache to hit.
	if tf.pkg, tf.info, err = typecheck(tf.curPkg.ImportPath, files, tf.origImporter); err != nil {
		return nil, err
	}

	var (
		ssaPkg       *ssa.Package
		requiredPkgs []string
	)
	if flagControlFlow {
		ssaPkg = ssaBuildPkg(tf.pkg, files, tf.info)

		newFileName, newFile, affectedFiles, err := ctrlflow.Obfuscate(fset, ssaPkg, files, tf.obfRand)
		if err != nil {
			return nil, err
		}

		if newFile != nil {
			files = append(files, newFile)
			paths = append(paths, newFileName)
			for _, file := range affectedFiles {
				tf.useAllImports(file)
			}
			if tf.pkg, tf.info, err = typecheck(tf.curPkg.ImportPath, files, tf.origImporter); err != nil {
				return nil, err
			}

			for _, imp := range newFile.Imports {
				path, err := strconv.Unquote(imp.Path.Value)
				if err != nil {
					panic(err) // should never happen
				}
				requiredPkgs = append(requiredPkgs, path)
			}
		}
	}

	if tf.curPkgCache, err = loadPkgCache(tf.curPkg, tf.pkg, files, tf.info, ssaPkg); err != nil {
		return nil, err
	}

	// These maps are not kept in pkgCache, since they are only needed to obfuscate curPkg.
	tf.fieldToStruct = computeFieldToStruct(tf.info)
	if flagLiterals {
		if tf.linkerVariableStrings, err = computeLinkerVariableStrings(tf.pkg); err != nil {
			return nil, err
		}
	}

	flags = alterTrimpath(flags)
	newImportCfg, err := tf.processImportCfg(flags, requiredPkgs)
	if err != nil {
		return nil, err
	}

	// If this is a package to obfuscate, swap the -p flag with the new package path.
	// We don't if it's the main package, as that just uses "-p main".
	// We only set newPkgPath if we're obfuscating the import path,
	// to replace the original package name in the package clause below.
	newPkgPath := ""
	if tf.curPkg.Name != "main" && tf.curPkg.ToObfuscate {
		newPkgPath = tf.curPkg.obfuscatedImportPath()
		flags = flagSetValue(flags, "-p", newPkgPath)
	}

	newPaths := make([]string, 0, len(files))

	for i, file := range files {
		basename := filepath.Base(paths[i])
		log.Printf("obfuscating %s", basename)
		if tf.curPkg.ImportPath == "runtime" {
			if flagTiny {
				// strip unneeded runtime code
				stripRuntime(basename, file)
				tf.useAllImports(file)
			}
			if basename == "symtab.go" {
				updateMagicValue(file, magicValue())
				updateEntryOffset(file, entryOffKey())
			}
		}
		tf.transformDirectives(file.Comments)
		file = tf.transformGoFile(file)
		// newPkgPath might be the original ImportPath in some edge cases like
		// compilerIntrinsics; we don't want to use slashes in package names.
		// TODO: when we do away with those edge cases, only check the string is
		// non-empty.
		if newPkgPath != "" && newPkgPath != tf.curPkg.ImportPath {
			file.Name.Name = newPkgPath
		}

		src, err := printFile(tf.curPkg, file)
		if err != nil {
			return nil, err
		}

		// We hide Go source filenames via "//line" directives,
		// so there is no need to use obfuscated filenames here.
		if path, err := tf.writeSourceFile(basename, basename, src); err != nil {
			return nil, err
		} else {
			newPaths = append(newPaths, path)
		}
	}
	flags = flagSetValue(flags, "-importcfg", newImportCfg)

	return append(flags, newPaths...), nil
}

// transformDirectives rewrites //go:linkname toolchain directives in comments
// to replace names with their obfuscated versions.
func (tf *transformer) transformDirectives(comments []*ast.CommentGroup) {
	for _, group := range comments {
		for _, comment := range group.List {
			if !strings.HasPrefix(comment.Text, "//go:linkname ") {
				continue
			}

			// We can have either just one argument:
			//
			//	//go:linkname localName
			//
			// Or two arguments, where the second may refer to a name in a
			// different package:
			//
			//	//go:linkname localName newName
			//	//go:linkname localName pkg.newName
			fields := strings.Fields(comment.Text)
			localName := fields[1]
			newName := ""
			if len(fields) == 3 {
				newName = fields[2]
			}

			localName, newName = tf.transformLinkname(localName, newName)
			fields[1] = localName
			if len(fields) == 3 {
				fields[2] = newName
			}

			if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed
				log.Printf("linkname %q changed to %q", comment.Text, strings.Join(fields, " "))
			}
			comment.Text = strings.Join(fields, " ")
		}
	}
}

func (tf *transformer) transformLinkname(localName, newName string) (string, string) {
	// obfuscate the local name, if the current package is obfuscated
	if tf.curPkg.ToObfuscate && !compilerIntrinsicsFuncs[tf.curPkg.ImportPath+"."+localName] {
		localName = hashWithPackage(tf.curPkg, localName)
	}
	if newName == "" {
		return localName, ""
	}
	// If the new name is of the form "pkgpath.Name", and we've obfuscated
	// "Name" in that package, rewrite the directive to use the obfuscated name.
	dotCnt := strings.Count(newName, ".")
	if dotCnt < 1 {
		// cgo-generated code uses linknames to made up symbol names,
		// which do not have a package path at all.
		// Replace the comment in case the local name was obfuscated.
		return localName, newName
	}
	switch newName {
	case "main.main", "main..inittask", "runtime..inittask":
		// The runtime uses some special symbols with "..".
		// We aren't touching those at the moment.
		return localName, newName
	}

	pkgSplit := 0
	var lpkg *listedPackage
	var foreignName string
	for {
		i := strings.Index(newName[pkgSplit:], ".")
		if i < 0 {
			// We couldn't find a prefix that matched a known package.
			// Probably a made up name like above, but with a dot.
			return localName, newName
		}
		pkgSplit += i
		pkgPath := newName[:pkgSplit]
		pkgSplit++ // skip over the dot

		if strings.HasSuffix(pkgPath, "_test") {
			// runtime uses a go:linkname to metrics_test;
			// we don't need this to work for now on regular builds,
			// though we might need to rethink this if we want "go test std" to work.
			continue
		}

		var err error
		lpkg, err = listPackage(tf.curPkg, pkgPath)
		if err == nil {
			foreignName = newName[pkgSplit:]
			break
		}
		if errors.Is(err, ErrNotFound) {
			// No match; find the next dot.
			continue
		}
		if errors.Is(err, ErrNotDependency) {
			fmt.Fprintf(os.Stderr,
				"//go:linkname refers to %s - add `import _ %q` for garble to find the package",
				newName, pkgPath)
			return localName, newName
		}
		panic(err) // shouldn't happen
	}

	if !lpkg.ToObfuscate || compilerIntrinsicsFuncs[lpkg.ImportPath+"."+foreignName] {
		// We're not obfuscating that package or name.
		return localName, newName
	}

	var newForeignName string
	if receiver, name, ok := strings.Cut(foreignName, "."); ok {
		if lpkg.ImportPath == "reflect" && (receiver == "(*rtype)" || receiver == "Value") {
			// These receivers are not obfuscated.
			// See the TODO below.
		} else if strings.HasPrefix(receiver, "(*") {
			// pkg/path.(*Receiver).method
			receiver = strings.TrimPrefix(receiver, "(*")
			receiver = strings.TrimSuffix(receiver, ")")
			receiver = "(*" + hashWithPackage(lpkg, receiver) + ")"
		} else {
			// pkg/path.Receiver.method
			receiver = hashWithPackage(lpkg, receiver)
		}
		// Exported methods are never obfuscated.
		//
		// TODO(mvdan): We're duplicating the logic behind these decisions.
		// Reuse the logic with transformCompile.
		if !token.IsExported(name) {
			name = hashWithPackage(lpkg, name)
		}
		newForeignName = receiver + "." + name
	} else {
		// pkg/path.function
		newForeignName = hashWithPackage(lpkg, foreignName)
	}

	newPkgPath := lpkg.ImportPath
	if newPkgPath != "main" {
		newPkgPath = lpkg.obfuscatedImportPath()
	}
	newName = newPkgPath + "." + newForeignName
	return localName, newName
}

// processImportCfg parses the importcfg file passed to a compile or link step.
// It also builds a new importcfg file to account for obfuscated import paths.
func (tf *transformer) processImportCfg(flags []string, requiredPkgs []string) (newImportCfg string, _ error) {
	importCfg := flagValue(flags, "-importcfg")
	if importCfg == "" {
		return "", fmt.Errorf("could not find -importcfg argument")
	}
	data, err := os.ReadFile(importCfg)
	if err != nil {
		return "", err
	}

	var packagefiles, importmaps [][2]string

	// using for track required but not imported packages
	var newIndirectImports map[string]bool
	if requiredPkgs != nil {
		newIndirectImports = make(map[string]bool)
		for _, pkg := range requiredPkgs {
			newIndirectImports[pkg] = true
		}
	}

	for _, line := range strings.Split(string(data), "\n") {
		if line == "" || strings.HasPrefix(line, "#") {
			continue
		}
		verb, args, found := strings.Cut(line, " ")
		if !found {
			continue
		}
		switch verb {
		case "importmap":
			beforePath, afterPath, found := strings.Cut(args, "=")
			if !found {
				continue
			}
			importmaps = append(importmaps, [2]string{beforePath, afterPath})
		case "packagefile":
			importPath, objectPath, found := strings.Cut(args, "=")
			if !found {
				continue
			}
			packagefiles = append(packagefiles, [2]string{importPath, objectPath})
			delete(newIndirectImports, importPath)
		}
	}

	// Produce the modified importcfg file.
	// This is mainly replacing the obfuscated paths.
	// Note that we range over maps, so this is non-deterministic, but that
	// should not matter as the file is treated like a lookup table.
	newCfg, err := os.CreateTemp(sharedTempDir, "importcfg")
	if err != nil {
		return "", err
	}
	for _, pair := range importmaps {
		beforePath, afterPath := pair[0], pair[1]
		lpkg, err := listPackage(tf.curPkg, beforePath)
		if err != nil {
			return "", err
		}
		if lpkg.ToObfuscate {
			// Note that beforePath is not the canonical path.
			// For beforePath="vendor/foo", afterPath and
			// lpkg.ImportPath can be just "foo".
			// Don't use obfuscatedImportPath here.
			beforePath = hashWithPackage(lpkg, beforePath)

			afterPath = lpkg.obfuscatedImportPath()
		}
		fmt.Fprintf(newCfg, "importmap %s=%s\n", beforePath, afterPath)
	}

	if len(newIndirectImports) > 0 {
		f, err := os.Open(filepath.Join(sharedTempDir, actionGraphFileName))
		if err != nil {
			return "", fmt.Errorf("cannot open action graph file: %v", err)
		}
		defer f.Close()

		var actions []struct {
			Mode    string
			Package string
			Objdir  string
		}
		if err := json.NewDecoder(f).Decode(&actions); err != nil {
			return "", fmt.Errorf("cannot parse action graph file: %v", err)
		}

		// theoretically action graph can be long, to optimise it process it in one pass
		// with an early exit when all the required imports are found
		for _, action := range actions {
			if action.Mode != "build" {
				continue
			}
			if ok := newIndirectImports[action.Package]; !ok {
				continue
			}

			packagefiles = append(packagefiles, [2]string{action.Package, filepath.Join(action.Objdir, "_pkg_.a")}) // file name hardcoded in compiler
			delete(newIndirectImports, action.Package)
			if len(newIndirectImports) == 0 {
				break
			}
		}

		if len(newIndirectImports) > 0 {
			return "", fmt.Errorf("cannot resolve required packages from action graph file: %v", requiredPkgs)
		}
	}

	for _, pair := range packagefiles {
		impPath, pkgfile := pair[0], pair[1]
		lpkg, err := listPackage(tf.curPkg, impPath)
		if err != nil {
			// TODO: it's unclear why an importcfg can include an import path
			// that's not a dependency in an edge case with "go test ./...".
			// See exporttest/*.go in testdata/scripts/test.txt.
			// For now, spot the pattern and avoid the unnecessary error;
			// the dependency is unused, so the packagefile line is redundant.
			// This still triggers as of go1.21.
			if strings.HasSuffix(tf.curPkg.ImportPath, ".test]") && strings.HasPrefix(tf.curPkg.ImportPath, impPath) {
				continue
			}
			return "", err
		}
		if lpkg.Name != "main" {
			impPath = lpkg.obfuscatedImportPath()
		}
		fmt.Fprintf(newCfg, "packagefile %s=%s\n", impPath, pkgfile)
	}

	// Uncomment to debug the transformed importcfg. Do not delete.
	// newCfg.Seek(0, 0)
	// io.Copy(os.Stderr, newCfg)

	if err := newCfg.Close(); err != nil {
		return "", err
	}
	return newCfg.Name(), nil
}

type (
	funcFullName = string // as per go/types.Func.FullName
	objectString = string // as per recordedObjectString

	typeName struct {
		PkgPath string // empty if builtin
		Name    string
	}
)

// pkgCache contains information about a package that will be stored in fsCache.
// Note that pkgCache is "deep", containing information about all packages
// which are transitive dependencies as well.
type pkgCache struct {
	// ReflectAPIs is a static record of what std APIs use reflection on their
	// parameters, so we can avoid obfuscating types used with them.
	//
	// TODO: we're not including fmt.Printf, as it would have many false positives,
	// unless we were smart enough to detect which arguments get used as %#v or %T.
	ReflectAPIs map[funcFullName]map[int]bool

	// ReflectObjects is filled with the fully qualified names from each
	// package that we cannot obfuscate due to reflection.
	// The included objects are named types and their fields,
	// since it is those names being obfuscated that could break the use of reflect.
	//
	// This record is necessary for knowing what names from imported packages
	// weren't obfuscated, so we can obfuscate their local uses accordingly.
	ReflectObjects map[objectString]struct{}

	// EmbeddedAliasFields records which embedded fields use a type alias.
	// They are the only instance where a type alias matters for obfuscation,
	// because the embedded field name is derived from the type alias itself,
	// and not the type that the alias points to.
	// In that way, the type alias is obfuscated as a form of named type,
	// bearing in mind that it may be owned by a different package.
	EmbeddedAliasFields map[objectString]typeName
}

func (c *pkgCache) CopyFrom(c2 pkgCache) {
	maps.Copy(c.ReflectAPIs, c2.ReflectAPIs)
	maps.Copy(c.ReflectObjects, c2.ReflectObjects)
	maps.Copy(c.EmbeddedAliasFields, c2.EmbeddedAliasFields)
}

func ssaBuildPkg(pkg *types.Package, files []*ast.File, info *types.Info) *ssa.Package {
	// Create SSA packages for all imports. Order is not significant.
	ssaProg := ssa.NewProgram(fset, 0)
	created := make(map[*types.Package]bool)
	var createAll func(pkgs []*types.Package)
	createAll = func(pkgs []*types.Package) {
		for _, p := range pkgs {
			if !created[p] {
				created[p] = true
				ssaProg.CreatePackage(p, nil, nil, true)
				createAll(p.Imports())
			}
		}
	}
	createAll(pkg.Imports())

	ssaPkg := ssaProg.CreatePackage(pkg, files, info, false)
	ssaPkg.Build()
	return ssaPkg
}

func openCache() (*cache.Cache, error) {
	// Use a subdirectory for the hashed build cache, to clarify what it is,
	// and to allow us to have other directories or files later on without mixing.
	dir := filepath.Join(sharedCache.CacheDir, "build")
	if err := os.MkdirAll(dir, 0o777); err != nil {
		return nil, err
	}
	return cache.Open(dir)
}

func loadPkgCache(lpkg *listedPackage, pkg *types.Package, files []*ast.File, info *types.Info, ssaPkg *ssa.Package) (pkgCache, error) {
	fsCache, err := openCache()
	if err != nil {
		return pkgCache{}, err
	}
	filename, _, err := fsCache.GetFile(lpkg.GarbleActionID)
	// Already in the cache; load it directly.
	if err == nil {
		f, err := os.Open(filename)
		if err != nil {
			return pkgCache{}, err
		}
		defer f.Close()
		var loaded pkgCache
		if err := gob.NewDecoder(f).Decode(&loaded); err != nil {
			return pkgCache{}, fmt.Errorf("gob decode: %w", err)
		}
		return loaded, nil
	}
	return computePkgCache(fsCache, lpkg, pkg, files, info, ssaPkg)
}

func computePkgCache(fsCache *cache.Cache, lpkg *listedPackage, pkg *types.Package, files []*ast.File, info *types.Info, ssaPkg *ssa.Package) (pkgCache, error) {
	// Not yet in the cache. Load the cache entries for all direct dependencies,
	// build our cache entry, and write it to disk.
	// Note that practically all errors from Cache.GetFile are a cache miss;
	// for example, a file might exist but be empty if another process
	// is filling the same cache entry concurrently.
	//
	// TODO: if A (curPkg) imports B and C, and B also imports C,
	// then loading the gob files from both B and C is unnecessary;
	// loading B's gob file would be enough. Is there an easy way to do that?
	computed := pkgCache{
		ReflectAPIs: map[funcFullName]map[int]bool{
			"reflect.TypeOf":  {0: true},
			"reflect.ValueOf": {0: true},
		},
		ReflectObjects:      map[objectString]struct{}{},
		EmbeddedAliasFields: map[objectString]typeName{},
	}
	for _, imp := range lpkg.Imports {
		if imp == "C" {
			// `go list -json` shows "C" in Imports but not Deps.
			// See https://go.dev/issue/60453.
			continue
		}
		// Shadowing lpkg ensures we don't use the wrong listedPackage below.
		lpkg, err := listPackage(lpkg, imp)
		if err != nil {
			return computed, err
		}
		if lpkg.BuildID == "" {
			continue // nothing to load
		}
		if err := func() error { // function literal for the deferred close
			if filename, _, err := fsCache.GetFile(lpkg.GarbleActionID); err == nil {
				// Cache hit; append new entries to computed.
				f, err := os.Open(filename)
				if err != nil {
					return err
				}
				defer f.Close()
				if err := gob.NewDecoder(f).Decode(&computed); err != nil {
					return fmt.Errorf("gob decode: %w", err)
				}
				return nil
			}
			// Missing or corrupted entry in the cache for a dependency.
			// Could happen if GARBLE_CACHE was emptied but GOCACHE was not.
			// Compute it, which can recurse if many entries are missing.
			files, err := parseFiles(lpkg.Dir, lpkg.CompiledGoFiles)
			if err != nil {
				return err
			}
			origImporter := importerForPkg(lpkg)
			pkg, info, err := typecheck(lpkg.ImportPath, files, origImporter)
			if err != nil {
				return err
			}
			computedImp, err := computePkgCache(fsCache, lpkg, pkg, files, info, nil)
			if err != nil {
				return err
			}
			computed.CopyFrom(computedImp)
			return nil
		}(); err != nil {
			return pkgCache{}, fmt.Errorf("pkgCache load for %s: %w", imp, err)
		}
	}

	// Fill EmbeddedAliasFields from the type info.
	for name, obj := range info.Uses {
		obj, ok := obj.(*types.TypeName)
		if !ok || !obj.IsAlias() {
			continue
		}
		vr, _ := info.Defs[name].(*types.Var)
		if vr == nil || !vr.Embedded() {
			continue
		}
		vrStr := recordedObjectString(vr)
		if vrStr == "" {
			continue
		}
		aliasTypeName := typeName{
			Name: obj.Name(),
		}
		if pkg := obj.Pkg(); pkg != nil {
			aliasTypeName.PkgPath = pkg.Path()
		}
		computed.EmbeddedAliasFields[vrStr] = aliasTypeName
	}

	// Fill the reflect info from SSA, which builds on top of the syntax tree and type info.
	inspector := reflectInspector{
		pkg:             pkg,
		checkedAPIs:     make(map[string]bool),
		propagatedInstr: map[ssa.Instruction]bool{},
		result:          computed, // append the results
	}
	if ssaPkg == nil {
		ssaPkg = ssaBuildPkg(pkg, files, info)
	}
	inspector.recordReflection(ssaPkg)

	// Unlikely that we could stream the gob encode, as cache.Put wants an io.ReadSeeker.
	var buf bytes.Buffer
	if err := gob.NewEncoder(&buf).Encode(computed); err != nil {
		return pkgCache{}, err
	}
	if err := fsCache.PutBytes(lpkg.GarbleActionID, buf.Bytes()); err != nil {
		return pkgCache{}, err
	}
	return computed, nil
}

// cmd/bundle will include a go:generate directive in its output by default.
// Ours specifies a version and doesn't assume bundle is in $PATH, so drop it.

//go:generate go run golang.org/x/tools/cmd/bundle -o cmdgo_quoted.go -prefix cmdgoQuoted cmd/internal/quoted
//go:generate sed -i /go:generate/d cmdgo_quoted.go

// computeLinkerVariableStrings iterates over the -ldflags arguments,
// filling a map with all the string values set via the linker's -X flag.
// TODO: can we put this in sharedCache, using objectString as a key?
func computeLinkerVariableStrings(pkg *types.Package) (map[*types.Var]string, error) {
	linkerVariableStrings := make(map[*types.Var]string)

	// TODO: this is a linker flag that affects how we obfuscate a package at
	// compile time. Note that, if the user changes ldflags, then Go may only
	// re-link the final binary, without re-compiling any packages at all.
	// It's possible that this could result in:
	//
	//    garble -literals build -ldflags=-X=pkg.name=before # name="before"
	//    garble -literals build -ldflags=-X=pkg.name=after  # name="before" as cached
	//
	// We haven't been able to reproduce this problem for now,
	// but it's worth noting it and keeping an eye out for it in the future.
	// If we do confirm this theoretical bug,
	// the solution will be to either find a different solution for -literals,
	// or to force including -ldflags into the build cache key.
	ldflags, err := cmdgoQuotedSplit(flagValue(sharedCache.ForwardBuildFlags, "-ldflags"))
	if err != nil {
		return nil, err
	}
	flagValueIter(ldflags, "-X", func(val string) {
		// val is in the form of "foo.com/bar.name=value".
		fullName, stringValue, found := strings.Cut(val, "=")
		if !found {
			return // invalid
		}

		// fullName is "foo.com/bar.name"
		i := strings.LastIndexByte(fullName, '.')
		path, name := fullName[:i], fullName[i+1:]

		// -X represents the main package as "main", not its import path.
		if path != pkg.Path() && (path != "main" || pkg.Name() != "main") {
			return // not the current package
		}

		obj, _ := pkg.Scope().Lookup(name).(*types.Var)
		if obj == nil {
			return // no such variable; skip
		}
		linkerVariableStrings[obj] = stringValue
	})
	return linkerVariableStrings, nil
}

// transformer holds all the information and state necessary to obfuscate a
// single Go package.
type transformer struct {
	// curPkg holds basic information about the package being currently compiled or linked.
	curPkg *listedPackage

	// curPkgCache is the pkgCache for curPkg.
	curPkgCache pkgCache

	// The type-checking results; the package itself, and the Info struct.
	pkg  *types.Package
	info *types.Info

	// linkerVariableStrings records objects for variables used in -ldflags=-X flags,
	// as well as the strings the user wants to inject them with.
	// Used when obfuscating literals, so that we obfuscate the injected value.
	linkerVariableStrings map[*types.Var]string

	// fieldToStruct helps locate struct types from any of their field
	// objects. Useful when obfuscating field names.
	fieldToStruct map[*types.Var]*types.Struct

	// obfRand is initialized by transformCompile and used during obfuscation.
	// It is left nil at init time, so that we only use it after it has been
	// properly initialized with a deterministic seed.
	// It must only be used for deterministic obfuscation;
	// if it is used for any other purpose, we may lose determinism.
	obfRand *mathrand.Rand

	// origImporter is a go/types importer which uses the original versions
	// of packages, without any obfuscation. This is helpful to make
	// decisions on how to obfuscate our input code.
	origImporter importerWithMap

	// usedAllImportsFiles is used to prevent multiple calls of tf.useAllImports function on one file
	// in case of simultaneously applied control flow and literals obfuscation
	usedAllImportsFiles map[*ast.File]bool
}

func typecheck(pkgPath string, files []*ast.File, origImporter importerWithMap) (*types.Package, *types.Info, error) {
	info := &types.Info{
		Types:      make(map[ast.Expr]types.TypeAndValue),
		Defs:       make(map[*ast.Ident]types.Object),
		Uses:       make(map[*ast.Ident]types.Object),
		Implicits:  make(map[ast.Node]types.Object),
		Scopes:     make(map[ast.Node]*types.Scope),
		Selections: make(map[*ast.SelectorExpr]*types.Selection),
		Instances:  make(map[*ast.Ident]types.Instance),
	}
	// TODO(mvdan): we should probably set types.Config.GoVersion from go.mod
	origTypesConfig := types.Config{Importer: origImporter}
	pkg, err := origTypesConfig.Check(pkgPath, fset, files, info)
	if err != nil {
		return nil, nil, fmt.Errorf("typecheck error: %v", err)
	}
	return pkg, info, err
}

func computeFieldToStruct(info *types.Info) map[*types.Var]*types.Struct {
	done := make(map[*types.Named]bool)
	fieldToStruct := make(map[*types.Var]*types.Struct)

	// Run recordType on all types reachable via types.Info.
	// A bit hacky, but I could not find an easier way to do this.
	for _, obj := range info.Uses {
		if obj != nil {
			recordType(obj.Type(), nil, done, fieldToStruct)
		}
	}
	for _, obj := range info.Defs {
		if obj != nil {
			recordType(obj.Type(), nil, done, fieldToStruct)
		}
	}
	for _, tv := range info.Types {
		recordType(tv.Type, nil, done, fieldToStruct)
	}
	return fieldToStruct
}

// recordType visits every reachable type after typechecking a package.
// Right now, all it does is fill the fieldToStruct map.
// Since types can be recursive, we need a map to avoid cycles.
// We only need to track named types as done, as all cycles must use them.
func recordType(used, origin types.Type, done map[*types.Named]bool, fieldToStruct map[*types.Var]*types.Struct) {
	if origin == nil {
		origin = used
	}
	type Container interface{ Elem() types.Type }
	switch used := used.(type) {
	case Container:
		// origin may be a *types.TypeParam, which is not a Container.
		// For now, we haven't found a need to recurse in that case.
		// We can edit this code in the future if we find an example,
		// because we panic if a field is not in fieldToStruct.
		if origin, ok := origin.(Container); ok {
			recordType(used.Elem(), origin.Elem(), done, fieldToStruct)
		}
	case *types.Named:
		if done[used] {
			return
		}
		done[used] = true
		// If we have a generic struct like
		//
		//	type Foo[T any] struct { Bar T }
		//
		// then we want the hashing to use the original "Bar T",
		// because otherwise different instances like "Bar int" and "Bar bool"
		// will result in different hashes and the field names will break.
		// Ensure we record the original generic struct, if there is one.
		recordType(used.Underlying(), used.Origin().Underlying(), done, fieldToStruct)
	case *types.Struct:
		origin := origin.(*types.Struct)
		for i := range used.NumFields() {
			field := used.Field(i)
			fieldToStruct[field] = origin

			if field.Embedded() {
				recordType(field.Type(), origin.Field(i).Type(), done, fieldToStruct)
			}
		}
	}
}

// isSafeForInstanceType returns true if the passed type is safe for var declaration.
// Unsafe types: generic types and non-method interfaces.
func isSafeForInstanceType(typ types.Type) bool {
	switch t := typ.(type) {
	case *types.Named:
		if t.TypeParams().Len() > 0 {
			return false
		}
		return isSafeForInstanceType(t.Underlying())
	case *types.Signature:
		return t.TypeParams().Len() == 0
	case *types.Interface:
		return t.IsMethodSet()
	}
	return true
}

func (tf *transformer) useAllImports(file *ast.File) {
	if tf.usedAllImportsFiles == nil {
		tf.usedAllImportsFiles = make(map[*ast.File]bool)
	} else if ok := tf.usedAllImportsFiles[file]; ok {
		return
	}
	tf.usedAllImportsFiles[file] = true

	for _, imp := range file.Imports {
		if imp.Name != nil && imp.Name.Name == "_" {
			continue
		}

		pkgName := tf.info.PkgNameOf(imp)
		pkgScope := pkgName.Imported().Scope()
		var nameObj types.Object
		for _, name := range pkgScope.Names() {
			if obj := pkgScope.Lookup(name); obj.Exported() && isSafeForInstanceType(obj.Type()) {
				nameObj = obj
				break
			}
		}
		if nameObj == nil {
			// A very unlikely situation where there is no suitable declaration for a reference variable
			// and almost certainly means that there is another import reference in code.
			continue
		}
		spec := &ast.ValueSpec{Names: []*ast.Ident{ast.NewIdent("_")}}
		decl := &ast.GenDecl{Specs: []ast.Spec{spec}}

		nameIdent := ast.NewIdent(nameObj.Name())
		var nameExpr ast.Expr
		switch {
		case imp.Name == nil: // import "pkg/path"
			nameExpr = &ast.SelectorExpr{
				X:   ast.NewIdent(pkgName.Name()),
				Sel: nameIdent,
			}
		case imp.Name.Name != ".": // import path2 "pkg/path"
			nameExpr = &ast.SelectorExpr{
				X:   ast.NewIdent(imp.Name.Name),
				Sel: nameIdent,
			}
		default: // import . "pkg/path"
			nameExpr = nameIdent
		}

		switch nameObj.(type) {
		case *types.Const:
			// const _ = <value>
			decl.Tok = token.CONST
			spec.Values = []ast.Expr{nameExpr}
		case *types.Var, *types.Func:
			// var _ = <value>
			decl.Tok = token.VAR
			spec.Values = []ast.Expr{nameExpr}
		case *types.TypeName:
			// var _ <type>
			decl.Tok = token.VAR
			spec.Type = nameExpr
		default:
			continue // skip *types.Builtin and others
		}

		// Ensure that types.Info.Uses is up to date.
		tf.info.Uses[nameIdent] = nameObj
		file.Decls = append(file.Decls, decl)
	}
}

// transformGoFile obfuscates the provided Go syntax file.
func (tf *transformer) transformGoFile(file *ast.File) *ast.File {
	// Only obfuscate the literals here if the flag is on
	// and if the package in question is to be obfuscated.
	//
	// We can't obfuscate literals in the runtime and its dependencies,
	// because obfuscated literals sometimes escape to heap,
	// and that's not allowed in the runtime itself.
	if flagLiterals && tf.curPkg.ToObfuscate {
		file = literals.Obfuscate(tf.obfRand, file, tf.info, tf.linkerVariableStrings)

		// some imported constants might not be needed anymore, remove unnecessary imports
		tf.useAllImports(file)
	}

	pre := func(cursor *astutil.Cursor) bool {
		node, ok := cursor.Node().(*ast.Ident)
		if !ok {
			return true
		}
		name := node.Name
		if name == "_" {
			return true // unnamed remains unnamed
		}
		obj := tf.info.ObjectOf(node)
		if obj == nil {
			_, isImplicit := tf.info.Defs[node]
			_, parentIsFile := cursor.Parent().(*ast.File)
			if !isImplicit || parentIsFile {
				// We only care about nil objects in the switch scenario below.
				return true
			}
			// In a type switch like "switch foo := bar.(type) {",
			// "foo" is being declared as a symbolic variable,
			// as it is only actually declared in each "case SomeType:".
			//
			// As such, the symbolic "foo" in the syntax tree has no object,
			// but it is still recorded under Defs with a nil value.
			// We still want to obfuscate that syntax tree identifier,
			// so if we detect the case, create a dummy types.Var for it.
			//
			// Note that "package mypkg" also denotes a nil object in Defs,
			// and we don't want to treat that "mypkg" as a variable,
			// so avoid that case by checking the type of cursor.Parent.
			obj = types.NewVar(node.Pos(), tf.pkg, name, nil)
		}
		pkg := obj.Pkg()
		if vr, ok := obj.(*types.Var); ok && vr.Embedded() {
			// The docs for ObjectOf say:
			//
			//     If id is an embedded struct field, ObjectOf returns the
			//     field (*Var) it defines, not the type (*TypeName) it uses.
			//
			// If this embedded field is a type alias, we want to
			// handle the alias's TypeName instead of treating it as
			// the type the alias points to.
			//
			// Alternatively, if we don't have an alias, we still want to
			// use the embedded type, not the field.
			vrStr := recordedObjectString(vr)
			aliasTypeName, ok := tf.curPkgCache.EmbeddedAliasFields[vrStr]
			if ok {
				aliasScope := tf.pkg.Scope()
				if path := aliasTypeName.PkgPath; path == "" {
					aliasScope = types.Universe
				} else if path != tf.pkg.Path() {
					// If the package is a dependency, import it.
					// We can't grab the package via tf.pkg.Imports,
					// because some of the packages under there are incomplete.
					// ImportFrom will cache complete imports, anyway.
					pkg2, err := tf.origImporter.ImportFrom(path, parentWorkDir, 0)
					if err != nil {
						panic(err)
					}
					aliasScope = pkg2.Scope()
				}
				tname, ok := aliasScope.Lookup(aliasTypeName.Name).(*types.TypeName)
				if !ok {
					panic(fmt.Sprintf("EmbeddedAliasFields pointed %q to a missing type %q", vrStr, aliasTypeName))
				}
				if !tname.IsAlias() {
					panic(fmt.Sprintf("EmbeddedAliasFields pointed %q to a non-alias type %q", vrStr, aliasTypeName))
				}
				obj = tname
			} else {
				named := namedType(obj.Type())
				if named == nil {
					return true // unnamed type (probably a basic type, e.g. int)
				}
				obj = named.Obj()
			}
			pkg = obj.Pkg()
		}
		if pkg == nil {
			return true // universe scope
		}

		// TODO: We match by object name here, which is actually imprecise.
		// For example, in package embed we match the type FS, but we would also
		// match any field or method named FS.
		// Can we instead use an object map like ReflectObjects?
		path := pkg.Path()
		switch path {
		case "sync/atomic", "runtime/internal/atomic":
			if name == "align64" {
				return true
			}
		case "embed":
			// FS is detected by the compiler for //go:embed.
			if name == "FS" {
				return true
			}
		case "reflect":
			switch name {
			// Per the linker's deadcode.go docs,
			// the Method and MethodByName methods are what drive the logic.
			case "Method", "MethodByName":
				return true
			}
		case "crypto/x509/pkix":
			// For better or worse, encoding/asn1 detects a "SET" suffix on slice type names
			// to tell whether those slices should be treated as sets or sequences.
			// Do not obfuscate those names to prevent breaking x509 certificates.
			// TODO: we can surely do better; ideally propose a non-string-based solution
			// upstream, or as a fallback, obfuscate to a name ending with "SET".
			if strings.HasSuffix(name, "SET") {
				return true
			}
		}

		// The package that declared this object did not obfuscate it.
		if usedForReflect(tf.curPkgCache, obj) {
			return true
		}

		lpkg, err := listPackage(tf.curPkg, path)
		if err != nil {
			panic(err) // shouldn't happen
		}
		if !lpkg.ToObfuscate {
			return true // we're not obfuscating this package
		}
		hashToUse := lpkg.GarbleActionID
		debugName := "variable"

		// log.Printf("%s: %#v %T", fset.Position(node.Pos()), node, obj)
		switch obj := obj.(type) {
		case *types.Var:
			if !obj.IsField() {
				// Identifiers denoting variables are always obfuscated.
				break
			}
			debugName = "field"
			// From this point on, we deal with struct fields.

			// Fields don't get hashed with the package's action ID.
			// They get hashed with the type of their parent struct.
			// This is because one struct can be converted to another,
			// as long as the underlying types are identical,
			// even if the structs are defined in different packages.
			//
			// TODO: Consider only doing this for structs where all
			// fields are exported. We only need this special case
			// for cross-package conversions, which can't work if
			// any field is unexported. If that is done, add a test
			// that ensures unexported fields from different
			// packages result in different obfuscated names.
			strct := tf.fieldToStruct[obj]
			if strct == nil {
				panic("could not find struct for field " + name)
			}
			node.Name = hashWithStruct(strct, obj)
			if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed
				log.Printf("%s %q hashed with struct fields to %q", debugName, name, node.Name)
			}
			return true

		case *types.TypeName:
			debugName = "type"
		case *types.Func:
			if compilerIntrinsicsFuncs[path+"."+name] {
				return true
			}

			sign := obj.Type().(*types.Signature)
			if sign.Recv() == nil {
				debugName = "func"
			} else {
				debugName = "method"
			}
			if obj.Exported() && sign.Recv() != nil {
				return true // might implement an interface
			}
			switch name {
			case "main", "init", "TestMain":
				return true // don't break them
			}
			if strings.HasPrefix(name, "Test") && isTestSignature(sign) {
				return true // don't break tests
			}
		default:
			return true // we only want to rename the above
		}

		node.Name = hashWithPackage(lpkg, name)
		// TODO: probably move the debugf lines inside the hash funcs
		if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed
			log.Printf("%s %q hashed with %x… to %q", debugName, name, hashToUse[:4], node.Name)
		}
		return true
	}
	post := func(cursor *astutil.Cursor) bool {
		imp, ok := cursor.Node().(*ast.ImportSpec)
		if !ok {
			return true
		}
		path, err := strconv.Unquote(imp.Path.Value)
		if err != nil {
			panic(err) // should never happen
		}
		// We're importing an obfuscated package.
		// Replace the import path with its obfuscated version.
		// If the import was unnamed, give it the name of the
		// original package name, to keep references working.
		lpkg, err := listPackage(tf.curPkg, path)
		if err != nil {
			panic(err) // should never happen
		}
		if !lpkg.ToObfuscate {
			return true
		}
		if lpkg.Name != "main" {
			newPath := lpkg.obfuscatedImportPath()
			imp.Path.Value = strconv.Quote(newPath)
		}
		if imp.Name == nil {
			imp.Name = &ast.Ident{
				NamePos: imp.Path.ValuePos, // ensure it ends up on the same line
				Name:    lpkg.Name,
			}
		}
		return true
	}

	return astutil.Apply(file, pre, post).(*ast.File)
}

// named tries to obtain the *types.Named behind a type, if there is one.
// This is useful to obtain "testing.T" from "*testing.T", or to obtain the type
// declaration object from an embedded field.
func namedType(t types.Type) *types.Named {
	switch t := t.(type) {
	case *types.Named:
		return t
	case interface{ Elem() types.Type }:
		return namedType(t.Elem())
	default:
		return nil
	}
}

// isTestSignature returns true if the signature matches "func _(*testing.T)".
func isTestSignature(sign *types.Signature) bool {
	if sign.Recv() != nil {
		return false // test funcs don't have receivers
	}
	params := sign.Params()
	if params.Len() != 1 {
		return false // too many parameters for a test func
	}
	named := namedType(params.At(0).Type())
	if named == nil {
		return false // the only parameter isn't named, like "string"
	}
	obj := named.Obj()
	return obj != nil && obj.Pkg().Path() == "testing" && obj.Name() == "T"
}

func (tf *transformer) transformLink(args []string) ([]string, error) {
	// We can't split by the ".a" extension, because cached object files
	// lack any extension.
	flags, args := splitFlagsFromArgs(args)

	newImportCfg, err := tf.processImportCfg(flags, nil)
	if err != nil {
		return nil, err
	}

	// TODO: unify this logic with the -X handling when using -literals.
	// We should be able to handle both cases via the syntax tree.
	//
	// Make sure -X works with obfuscated identifiers.
	// To cover both obfuscated and non-obfuscated names,
	// duplicate each flag with a obfuscated version.
	flagValueIter(flags, "-X", func(val string) {
		// val is in the form of "foo.com/bar.name=value".
		fullName, stringValue, found := strings.Cut(val, "=")
		if !found {
			return // invalid
		}

		// fullName is "foo.com/bar.name"
		i := strings.LastIndexByte(fullName, '.')
		path, name := fullName[:i], fullName[i+1:]

		// If the package path is "main", it's the current top-level
		// package we are linking.
		// Otherwise, find it in the cache.
		lpkg := tf.curPkg
		if path != "main" {
			lpkg = sharedCache.ListedPackages[path]
		}
		if lpkg == nil {
			// We couldn't find the package.
			// Perhaps a typo, perhaps not part of the build.
			// cmd/link ignores those, so we should too.
			return
		}
		// As before, the main package must remain as "main".
		newPath := path
		if path != "main" {
			newPath = lpkg.obfuscatedImportPath()
		}
		newName := hashWithPackage(lpkg, name)
		flags = append(flags, fmt.Sprintf("-X=%s.%s=%s", newPath, newName, stringValue))
	})

	// Starting in Go 1.17, Go's version is implicitly injected by the linker.
	// It's the same method as -X, so we can override it with an extra flag.
	flags = append(flags, "-X=runtime.buildVersion=unknown")

	// Ensure we strip the -buildid flag, to not leak any build IDs for the
	// link operation or the main package's compilation.
	flags = flagSetValue(flags, "-buildid", "")

	// Strip debug information and symbol tables.
	flags = append(flags, "-w", "-s")

	flags = flagSetValue(flags, "-importcfg", newImportCfg)
	return append(flags, args...), nil
}

func splitFlagsFromArgs(all []string) (flags, args []string) {
	for i := 0; i < len(all); i++ {
		arg := all[i]
		if !strings.HasPrefix(arg, "-") {
			return all[:i:i], all[i:]
		}
		if booleanFlags[arg] || strings.Contains(arg, "=") {
			// Either "-bool" or "-name=value".
			continue
		}
		// "-name value", so the next arg is part of this flag.
		i++
	}
	return all, nil
}

func alterTrimpath(flags []string) []string {
	trimpath := flagValue(flags, "-trimpath")

	// Add our temporary dir to the beginning of -trimpath, so that we don't
	// leak temporary dirs. Needs to be at the beginning, since there may be
	// shorter prefixes later in the list, such as $PWD if TMPDIR=$PWD/tmp.
	return flagSetValue(flags, "-trimpath", sharedTempDir+"=>;"+trimpath)
}

// forwardBuildFlags is obtained from 'go help build' as of Go 1.21.
var forwardBuildFlags = map[string]bool{
	// These shouldn't be used in nested cmd/go calls.
	"-a": false,
	"-n": false,
	"-x": false,
	"-v": false,

	// These are always set by garble.
	"-trimpath": false,
	"-toolexec": false,
	"-buildvcs": false,

	"-C":             true,
	"-asan":          true,
	"-asmflags":      true,
	"-buildmode":     true,
	"-compiler":      true,
	"-cover":         true,
	"-covermode":     true,
	"-coverpkg":      true,
	"-gccgoflags":    true,
	"-gcflags":       true,
	"-installsuffix": true,
	"-ldflags":       true,
	"-linkshared":    true,
	"-mod":           true,
	"-modcacherw":    true,
	"-modfile":       true,
	"-msan":          true,
	"-overlay":       true,
	"-p":             true,
	"-pgo":           true,
	"-pkgdir":        true,
	"-race":          true,
	"-tags":          true,
	"-work":          true,
	"-workfile":      true,
}

// booleanFlags is obtained from 'go help build' and 'go help testflag' as of Go 1.21.
var booleanFlags = map[string]bool{
	// Shared build flags.
	"-a":          true,
	"-asan":       true,
	"-buildvcs":   true,
	"-cover":      true,
	"-i":          true,
	"-linkshared": true,
	"-modcacherw": true,
	"-msan":       true,
	"-n":          true,
	"-race":       true,
	"-trimpath":   true,
	"-v":          true,
	"-work":       true,
	"-x":          true,

	// Test flags (TODO: support its special -args flag)
	"-benchmem": true,
	"-c":        true,
	"-failfast": true,
	"-fullpath": true,
	"-json":     true,
	"-short":    true,
}

func filterForwardBuildFlags(flags []string) (filtered []string, firstUnknown string) {
	for i := 0; i < len(flags); i++ {
		arg := flags[i]
		if strings.HasPrefix(arg, "--") {
			arg = arg[1:] // "--name" to "-name"; keep the short form
		}

		name, _, _ := strings.Cut(arg, "=") // "-name=value" to "-name"

		buildFlag := forwardBuildFlags[name]
		if buildFlag {
			filtered = append(filtered, arg)
		} else {
			firstUnknown = name
		}
		if booleanFlags[arg] || strings.Contains(arg, "=") {
			// Either "-bool" or "-name=value".
			continue
		}
		// "-name value", so the next arg is part of this flag.
		if i++; buildFlag && i < len(flags) {
			filtered = append(filtered, flags[i])
		}
	}
	return filtered, firstUnknown
}

// splitFlagsFromFiles splits args into a list of flag and file arguments. Since
// we can't rely on "--" being present, and we don't parse all flags upfront, we
// rely on finding the first argument that doesn't begin with "-" and that has
// the extension we expect for the list of paths.
//
// This function only makes sense for lower-level tool commands, such as
// "compile" or "link", since their arguments are predictable.
//
// We iterate from the end rather than from the start, to better protect
// oursrelves from flag arguments that may look like paths, such as:
//
//	compile [flags...] -p pkg/path.go [more flags...] file1.go file2.go
//
// For now, since those confusing flags are always followed by more flags,
// iterating in reverse order works around them entirely.
func splitFlagsFromFiles(all []string, ext string) (flags, paths []string) {
	for i := len(all) - 1; i >= 0; i-- {
		arg := all[i]
		if strings.HasPrefix(arg, "-") || !strings.HasSuffix(arg, ext) {
			cutoff := i + 1 // arg is a flag, not a path
			return all[:cutoff:cutoff], all[cutoff:]
		}
	}
	return nil, all
}

// flagValue retrieves the value of a flag such as "-foo", from strings in the
// list of arguments like "-foo=bar" or "-foo" "bar". If the flag is repeated,
// the last value is returned.
func flagValue(flags []string, name string) string {
	lastVal := ""
	flagValueIter(flags, name, func(val string) {
		lastVal = val
	})
	return lastVal
}

// flagValueIter retrieves all the values for a flag such as "-foo", like
// flagValue. The difference is that it allows handling complex flags, such as
// those whose values compose a list.
func flagValueIter(flags []string, name string, fn func(string)) {
	for i, arg := range flags {
		if val := strings.TrimPrefix(arg, name+"="); val != arg {
			// -name=value
			fn(val)
		}
		if arg == name { // -name ...
			if i+1 < len(flags) {
				// -name value
				fn(flags[i+1])
			}
		}
	}
}

func flagSetValue(flags []string, name, value string) []string {
	for i, arg := range flags {
		if strings.HasPrefix(arg, name+"=") {
			// -name=value
			flags[i] = name + "=" + value
			return flags
		}
		if arg == name { // -name ...
			if i+1 < len(flags) {
				// -name value
				flags[i+1] = value
				return flags
			}
			return flags
		}
	}
	return append(flags, name+"="+value)
}

func fetchGoEnv() error {
	out, err := exec.Command("go", "env", "-json",
		// Keep in sync with sharedCache.GoEnv.
		"GOOS", "GOMOD", "GOVERSION", "GOROOT",
	).CombinedOutput()
	if err != nil {
		// TODO: cover this in the tests.
		fmt.Fprintf(os.Stderr, `Can't find the Go toolchain: %v

This is likely due to Go not being installed/setup correctly.

To install Go, see: https://go.dev/doc/install
`, err)
		return errJustExit(1)
	}
	if err := json.Unmarshal(out, &sharedCache.GoEnv); err != nil {
		return fmt.Errorf(`cannot unmarshal from "go env -json": %w`, err)
	}
	sharedCache.GOGARBLE = cmp.Or(os.Getenv("GOGARBLE"), "*") // we default to obfuscating everything
	return nil
}
-												set up an AUTHORS file to attribute copyright

Many files were missing copyright, so also add a short script to add the
missing lines with the current year, and run it.

The AUTHORS file is also self-explanatory. Contributors can add
themselves there, or we can simply update it from time to time via
git-shortlog.

Since we have two scripts now, set up a directory for them.

											
										
										
											4 years ago
+								// Copyright (c) 2019, The Garble Authors.
 								// See LICENSE for licensing information.
-												initial commit

											
										
										
											5 years ago
-												add package godoc

So that pkg.go.dev and `go doc` have something to show.
Copying the first sentence from the README for brevity.

											
										
										
											5 months ago
+								// garble obfuscates Go code by wrapping the Go toolchain.
-												initial commit

											
										
										
											5 years ago
+								package main
 								import (
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+									"bufio"
-												make selection of packages configurable via GOPRIVATE

Carefully select a default that will do the right thing when inside a
module, as well as when building ad-hoc packages.

This means we no longer need to look at the compiler's -std flag, which
is nice.

Also replace foo.com/ with test/, as per golang/go#37641.

Fixes #7.

											
										
										
											4 years ago
+									"bytes"
-												start using some Go 1.22 features

We no longer need to worry about the scope of range variables,
we can iterate over integers directly, and we can use cmp.Or too.

I haven't paid close attention to using these everywhere.
This is mainly testing out the new features where I saw some benefit.

											
										
										
											4 months ago
+									"cmp"
-												avoid using math/rand's global funcs like Seed and Intn

Go 1.20 is starting to deprecate the use of math/rand's global state,
per https://go.dev/issue/56319 and https://go.dev/issue/20661.
The reasoning is sound:

	Deprecated: Programs that call Seed and then expect a specific sequence
	of results from the global random source (using functions such as Int)
	can be broken when a dependency changes how much it consumes from the
	global random source. To avoid such breakages, programs that need a
	specific result sequence should use NewRand(NewSource(seed)) to obtain a
	random generator that other packages cannot access.

Aside from the tests, we used math/rand only for obfuscating literals,
which caused a deterministic series of calls like Intn. Our call to Seed
was also deterministic, per either GarbleActionID or the -seed flag.

However, our determinism was fragile. If any of our dependencies or
other packages made any calls to math/rand's global funcs, then our
determinism could be broken entirely, and it's hard to notice.

Start using separate math/rand.Rand objects for each use case.
Also make uses of crypto/rand use "cryptorand" for consistency.

Note that this requires a bit of a refactor in internal/literals
to start passing around Rand objects. We also do away with unnecessary
short funcs, especially since math/rand's Read never errors,
and we can obtain a byte via math/rand's Uint32.

											
										
										
											1 year ago
+									cryptorand "crypto/rand"
-												start hashing identifiers

											
										
										
											5 years ago
+									"encoding/base64"
-												add seed flag to control how builds are reproducible

Fixes #26.
											
										
										
											4 years ago
+									"encoding/binary"
-												More robust reflection detection

Functions which use reflection on one of their parameters are,
now added to knownReflectAPIs automatically.

This makes most explicit hints for reflection redundant.
Simple protobuf code now works correctly when obfuscated.

Fixes #162
Fixes #373

											
										
										
											3 years ago
+									"encoding/gob"
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
+									"encoding/json"
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
+									"errors"
-												initial commit

											
										
										
											5 years ago
+									"flag"
 									"fmt"
-												start hashing identifiers

											
										
										
											5 years ago
+									"go/ast"
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+									"go/importer"
-												start hashing identifiers

											
										
										
											5 years ago
+									"go/parser"
 									"go/token"
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+									"go/types"
-												drop Go 1.21 and start using go/version

Needing to awkwardly treat Go versions as if they were semver
is no longer necessary thanks to go/version being in Go 1.22.0 now.

											
										
										
											4 months ago
+									"go/version"
-												start hashing identifiers

											
										
										
											5 years ago
+									"io"
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
+									"io/fs"
-												add a bit more docs

											
										
										
											5 years ago
+									"log"
-												add seed flag to control how builds are reproducible

Fixes #26.
											
										
										
											4 years ago
+									mathrand "math/rand"
-												initial commit

											
										
										
											5 years ago
+									"os"
 									"os/exec"
 									"path/filepath"
-												drop support for Go 1.16.x

We can now use pruned module graphs in go.mod files,
and we no longer need to worry about runtime/internal/sys.

Note that I had to update testdata/mod slightly,
as the new pruned module graphs algorithm downloads an extra go.mod file.

This change also paves the way towards future Go 1.18 support.

Thanks to lu4p for cleaning up two TODOs as well.

Co-Authored-By: lu4p <lu4p@pm.me>

											
										
										
											3 years ago
+									"regexp"
-												make the tool work on Windows, enable tests

The tests required a few last tweaks to work on Windows.

											
										
										
											5 years ago
+									"runtime"
-												add a "version" command (#220)

Mimicking "go version", this tells the user garble's own version.

The code is exactly the same that is used for another tool written in
Go, shfmt. It uses runtime/debug to fetch the module version embedded in
binaries built by Go. For example:

	$ go get mvdan.cc/sh/v3/cmd/shfmt@latest
	$ shfmt -version
	v3.2.2
	$ go get mvdan.cc/sh/v3/cmd/shfmt@master
	$ shfmt -version
	v3.3.0-0.dev.0.20210203135509-56c9918c980d

Note that this will not work for a plain "go build" or "go install"
after a "git clone", since in that case the Go tool can't know garble's
own version via go.mod - since it's the current main module:

	$ go build
	$ ./garble version
	(devel)

For the use case of the power user building from source directly, they
are probably clever enough to tell us what git commit they are on, so
this is not a big problem right now. It will also get better once
golang/go#37475 is fixed in the future.

Until then, if we need to do "release" builds locally, we can embed an
explicit version into the binary via ldflags:

	$ go build -ldflags=-X=main.version=v1.2.3
	$ ./garble version
	v1.2.3

Fixes #217.
											
										
										
											3 years ago
+									"runtime/debug"
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+									"strconv"
-												start enforcing the link flags -w -s

											
										
										
											5 years ago
+									"strings"
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+									"time"
-												obfuscate asm function names as well (#273)

Historically, it was impossible to rename those funcs as the
implementation was in assembly files, and we only transformed Go code.

Now that transformAsm exists, it's only about fifty lines to do some
very basic parsing and rewriting of assembly files.

This fixes the obfuscated builds of multiple std packages, including a
few dependencies of net/http, since they included assembly funcs which
called pure Go functions. Those pure Go functions had their names
obfuscated, breaking the call sites in assembly.

Fixes #258.
Fixes #261.
											
										
										
											3 years ago
+									"unicode"
 									"unicode/utf8"
-												update dependency versions, drop Go 1.14

Most notably, x/mod now includes the GOPRIVATE pattern-matching API we
were copying before, so we can use it directly.

Also bump the Go version requirement to 1.15, in preparation for the
import path obfuscation PR, and don't let the gotip job fail the entire
workflow.
											
										
										
											4 years ago
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+									"github.com/rogpeppe/go-internal/cache"
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+									"golang.org/x/exp/maps"
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+									"golang.org/x/exp/slices"
-												make "garble version" friendlier for devel builds

The proposal at https://go.dev/issue/50603 has been approved,
so Go will at some point start producing module pseudo-versions
even if the main module was built from a VCS clone.

To not wait until a future release like Go 1.20,
implement that ourselves with the help of module.PseudoVersion.

The result is a friendlier version output; what used to be

	$ go install && garble version
	mvdan.cc/garble (devel)

	Build settings:
	[...]

will now look like

	$ go install && garble version
	mvdan.cc/garble v0.0.0-20220505210747-22e3d30216be

	Build settings:
	[...]

Note that we don't use VCS tags in any way, so the prefix is hard-coded
as v0.0.0. That seems fine for development builds, and Go doesn't embed
VCS tag information in binaries anyway.

Finally, note that we start printing the module sum, as it's redundant.
The VCS commit hash, at least in git, should be unique enough.

											
										
										
											2 years ago
+									"golang.org/x/mod/module"
-												follow-up patches to the 'go version' checking (#139)

Give the func a name that tells what the return value means.

Add missing newlines to printfs, use consistent quoting, and replace
"%s" with %q.

Document the Go 1.15 date.

Finally, fix the imports via goimports.
											
										
										
											4 years ago
+									"golang.org/x/tools/go/ast/astutil"
-												 rework reflection detection with ssa (#732)

This is significantly more robust, than the ast based detection and can
record very complex cases of indirect parameter reflection.

Fixes #554

											
										
										
											1 year ago
+									"golang.org/x/tools/go/ssa"
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									"mvdan.cc/garble/internal/ctrlflow"
-												 rework reflection detection with ssa (#732)

This is significantly more robust, than the ast based detection and can
record very complex cases of indirect parameter reflection.

Fixes #554

											
										
										
											1 year ago
-												patch and rebuild cmd/link to modify the magic value in pclntab

This value is hard-coded in the linker and written in a header.
We could rewrite the final binary, like we used to do with import paths,
but that would require once again maintaining libraries to do so.

Instead, we're now modifying the linker to do what we want.
It's not particularly hard, as every Go install has its source code,
and rebuilding a slightly modified linker only takes a few seconds at most.

Thanks to `go build -overlay`, we only need to copy the files we modify,
and right now we're just modifying one file in the toolchain.
We use a git patch, as the change is fairly static and small,
and the patch is easier to understand and maintain.

The other side of this change is in the runtime,
as it also hard-codes the magic value when loading information.
We modify the code via syntax trees in that case, like `-tiny` does,
because the change is tiny (one literal) and the affected lines of code
are modified regularly between major Go releases.

Since rebuilding a slightly modified linker can take a few seconds,
and Go's build cache does not cache linked binaries,
we keep our own cached version of the rebuilt binary in `os.UserCacheDir`.

The feature isn't perfect, and will be improved in the future.
See the TODOs about the added dependency on `git`,
or how we are currently only able to cache one linker binary at once.

Fixes #622.
											
										
										
											1 year ago
+									"mvdan.cc/garble/internal/linker"
-												Use XOR instead of AES for literal obfuscation.

Implement a literal obfuscator interface,
to allow the easy addition of new encodings.

Add literal obfuscation for byte literals.

Choose a random obfuscator on literal obfuscation,
useful when multiple obfuscators are implemented.

Fixes #62

											
										
										
											4 years ago
+									"mvdan.cc/garble/internal/literals"
-												initial commit

											
										
										
											5 years ago
+								)
-												make "garble version" include VCS information

When someone builds garble from a git clone,
the resulting binary used to not contain any information:

	$ garble version
	(devel)

Since Go 1.18, VCS information is stamped by default into binaries.
We now print it, alongside any other available build settings:

	$ garble version
	mvdan.cc/garble (devel)

	Build settings:
	       -compiler gc
	     CGO_ENABLED 1
	          GOARCH amd64
	            GOOS linux
	         GOAMD64 v3
	             vcs git
	    vcs.revision 91ea246349544769f5100c29f79cb0f173abfeea
	        vcs.time 2022-03-18T13:45:11Z
	    vcs.modified true

Note that it's still possible for a garble build to contain no useful
version information, such as when built via "go build -buildvcs=false".
However, if a user opts into omitting the information, it's on them to
figure out what version of garble they actually built.

While here, bump test-gotip.

Fixes #491.

											
										
										
											2 years ago
+								var flagSet = flag.NewFlagSet("garble", flag.ContinueOnError)
-												initial commit

											
										
										
											5 years ago
-												allow easy inpection of garbled code

Fixes #17.
											
										
										
											4 years ago
+								var (
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+									flagLiterals bool
 									flagTiny     bool
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+									flagDebug    bool
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+									flagDebugDir string
 									flagSeed     seedFlag
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									// TODO(pagran): in the future, when control flow obfuscation will be stable migrate to flag
 									flagControlFlow = os.Getenv("GARBLE_EXPERIMENTAL_CONTROLFLOW") == "1"
-												allow easy inpection of garbled code

Fixes #17.
											
										
										
											4 years ago
+								)
-												add basic literal obfuscation, starting with strings

Fixes #16.
											
										
										
											4 years ago
 								func init() {
 									flagSet.Usage = usage
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+									flagSet.BoolVar(&flagLiterals, "literals", false, "Obfuscate literals such as strings")
 									flagSet.BoolVar(&flagTiny, "tiny", false, "Optimize for binary size, losing some ability to reverse the process")
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+									flagSet.BoolVar(&flagDebug, "debug", false, "Print debug logs to stderr")
-												use "obfuscate" instead of "garble" in some more places

Mainly comments. "garble" refers to the tool, but the verb and adjective
is more intuitive as "obfuscate" and "obfuscated" instead of "garble"
and "garbled".

											
										
										
											3 years ago
+									flagSet.StringVar(&flagDebugDir, "debugdir", "", "Write the obfuscated source to a directory, e.g. -debugdir=out")
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+									flagSet.Var(&flagSeed, "seed", "Provide a base64-encoded seed, e.g. -seed=o9WDTZ4CN4w\nFor a random seed, provide -seed=random")
 								}
-												avoid capturing groups in regular expressions

We don't use sub-matches captured by these groups,
so avoiding that extra work will save some CPU cycles.
It is likely insignificant compared to the rest of a Go build,
but it's a very easy little win.

											
										
										
											2 years ago
+								var rxGarbleFlag = regexp.MustCompile(`-(?:literals|tiny|debug|debugdir|seed)(?:$|=)`)
-												give a useful error for "garble build -tiny"

We've had confused users a handful of times by now.
And it's reasonable to expect flags to be after the command,
as that's how flags work for cmd/go itself.

I don't think we want to mix our flags with Go's,
or start accepting flags in either place.
Both seem like worse solutions long-term, as they can add confusion.

However, we can quickly give a useful hint when a flag is misplaced.
That should get new users unblocked without asking for help.

We use a regular expression for this purpose,
because it doesn't seem like a FlagSet supports what we need;
to detect whether an argument is one of our flags,
without actually applying its value to the flagset.
Our flagset would also error on Go's flags, which we don't want.

											
										
										
											2 years ago
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+								type seedFlag struct {
 									random bool
 									bytes  []byte
 								}
-												make obfuscation fully deterministic with -seed

The default behavior of garble is to seed via the build inputs,
including the build IDs of the entire Go build of each package.
This works well as a default, and does give us determinism,
but it means that building for different platforms
will result in different obfuscation per platform.

Instead, when -seed is provided, don't use any other hash seed or salt.
This means that a particular Go name will be obfuscated the same way
as long as the seed, package path, and name itself remain constant.

In other words, when the user supplies a custom -seed,
we assume they know what they're doing in terms of storage and rotation.

Expand the README docs with more examples and detail.

Fixes #449.

											
										
										
											2 years ago
+								func (f seedFlag) present() bool { return len(f.bytes) > 0 }
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+								func (f seedFlag) String() string {
 									return base64.RawStdEncoding.EncodeToString(f.bytes)
 								}
 								func (f *seedFlag) Set(s string) error {
 									if s == "random" {
-												print chosen seed when building with -seed=random

The seedFlag.random field had never worked,
as my refactor in December 2021 never set it to true.

Even if the boolean was working, we only printed the random seed
when we failed. It's still useful to see it when a build succeeds,
for example when wanting to reproduce the same binary
or when wanting to reverse a panic from the produced binary.

Add a test this time.

Fixes #696.

											
										
										
											1 year ago
+										f.random = true // to show the random seed we chose
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+										f.bytes = make([]byte, 16) // random 128 bit seed
-												avoid using math/rand's global funcs like Seed and Intn

Go 1.20 is starting to deprecate the use of math/rand's global state,
per https://go.dev/issue/56319 and https://go.dev/issue/20661.
The reasoning is sound:

	Deprecated: Programs that call Seed and then expect a specific sequence
	of results from the global random source (using functions such as Int)
	can be broken when a dependency changes how much it consumes from the
	global random source. To avoid such breakages, programs that need a
	specific result sequence should use NewRand(NewSource(seed)) to obtain a
	random generator that other packages cannot access.

Aside from the tests, we used math/rand only for obfuscating literals,
which caused a deterministic series of calls like Intn. Our call to Seed
was also deterministic, per either GarbleActionID or the -seed flag.

However, our determinism was fragile. If any of our dependencies or
other packages made any calls to math/rand's global funcs, then our
determinism could be broken entirely, and it's hard to notice.

Start using separate math/rand.Rand objects for each use case.
Also make uses of crypto/rand use "cryptorand" for consistency.

Note that this requires a bit of a refactor in internal/literals
to start passing around Rand objects. We also do away with unnecessary
short funcs, especially since math/rand's Read never errors,
and we can obtain a byte via math/rand's Uint32.

											
										
										
											1 year ago
+										if _, err := cryptorand.Read(f.bytes); err != nil {
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+											return fmt.Errorf("error generating random seed: %v", err)
 										}
 									} else {
 										// We expect unpadded base64, but to be nice, accept padded
 										// strings too.
 										s = strings.TrimRight(s, "=")
 										seed, err := base64.RawStdEncoding.DecodeString(s)
 										if err != nil {
 											return fmt.Errorf("error decoding seed: %v", err)
 										}
-												avoid using math/rand's global funcs like Seed and Intn

Go 1.20 is starting to deprecate the use of math/rand's global state,
per https://go.dev/issue/56319 and https://go.dev/issue/20661.
The reasoning is sound:

	Deprecated: Programs that call Seed and then expect a specific sequence
	of results from the global random source (using functions such as Int)
	can be broken when a dependency changes how much it consumes from the
	global random source. To avoid such breakages, programs that need a
	specific result sequence should use NewRand(NewSource(seed)) to obtain a
	random generator that other packages cannot access.

Aside from the tests, we used math/rand only for obfuscating literals,
which caused a deterministic series of calls like Intn. Our call to Seed
was also deterministic, per either GarbleActionID or the -seed flag.

However, our determinism was fragile. If any of our dependencies or
other packages made any calls to math/rand's global funcs, then our
determinism could be broken entirely, and it's hard to notice.

Start using separate math/rand.Rand objects for each use case.
Also make uses of crypto/rand use "cryptorand" for consistency.

Note that this requires a bit of a refactor in internal/literals
to start passing around Rand objects. We also do away with unnecessary
short funcs, especially since math/rand's Read never errors,
and we can obtain a byte via math/rand's Uint32.

											
										
										
											1 year ago
+										// TODO: Note that we always use 8 bytes; any bytes after that are
 										// entirely ignored. That may be confusing to the end user.
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+										if len(seed) < 8 {
 											return fmt.Errorf("-seed needs at least 8 bytes, have %d", len(seed))
 										}
 										f.bytes = seed
 									}
 									return nil
-												add basic literal obfuscation, starting with strings

Fixes #16.
											
										
										
											4 years ago
+								}
-												initial commit

											
										
										
											5 years ago
 								func usage() {
 									fmt.Fprintf(os.Stderr, `
-												all: update the docs a bit

Rework the features section in the README, leaving optional features at
the end of the list. Simplify the caveats list, too; the build cache and
exported field/method bits only need one point each. Overall, the
section was far too wordy for little reason.

Also redo the help text a bit. There's now a line to briefly introduce
the tool, as well as a link to the README with all the details. Finally,
the flags have shorter and more consistent help strings.

While at it, remove two unused global vars as spotted by staticcheck.

											
										
										
											4 years ago
+								Garble obfuscates Go code by wrapping the Go toolchain.
-												initial commit

											
										
										
											5 years ago
-												make the positioning of Go flags clearer in the help text

We did say "garble flags before the command" towards the end,
but let's also make that clearer at the top.

For #342.

											
										
										
											3 years ago
+									garble [garble flags] command [go flags] [go arguments]
-												introduce 'garble build' shortcut

This way, the user doesn't need to remember to use flags like -a and
-trimpath. Also because we might need more 'go build' flags in the
future.

											
										
										
											5 years ago
-												document "garble reverse"

Fixes #5.

											
										
										
											3 years ago
+								For example, to build an obfuscated program:
 									garble build ./cmd/foo
-												give a usage example for combining flags

Users are still filing bugs about this, presumably because the docs
aren't clear enough.

Fixes #372.

											
										
										
											3 years ago
+								Similarly, to combine garble flags and Go build flags:
 									garble -literals build -tags=purego ./cmd/foo
-												document "garble reverse"

Fixes #5.

											
										
										
											3 years ago
+								The following commands are supported:
-												always require one argument for "reverse"

The "reverse" command had many levels of optional arguments:

	garble [garble flags] reverse [build flags] [package] [files]

This was pretty confusing,
and could easily lead to people running the command incorrectly:

	# note that output.txt isn't a Go package!
	garble reverse output.txt

Moreover, it made the handling of Go build flags pretty confusing.
Should the command below work?

	garble reverse -tags=mytag

It also made it easy to not notice that one must supply the main package
to properly reverse some text that it produced, like a panic message.
With the package path being implicit,
one could mistakenly provide the wrong package by running garble
in a directory containing a different package.

See #394.

											
										
										
											3 years ago
+									build          replace "go build"
 									test           replace "go test"
-												support `garble run`

It's the third time I've wanted it to quickly test out standalone
reproducers when people submit bugs.

Fixes #661.

											
										
										
											1 year ago
+									run            replace "go run"
-												always require one argument for "reverse"

The "reverse" command had many levels of optional arguments:

	garble [garble flags] reverse [build flags] [package] [files]

This was pretty confusing,
and could easily lead to people running the command incorrectly:

	# note that output.txt isn't a Go package!
	garble reverse output.txt

Moreover, it made the handling of Go build flags pretty confusing.
Should the command below work?

	garble reverse -tags=mytag

It also made it easy to not notice that one must supply the main package
to properly reverse some text that it produced, like a panic message.
With the package path being implicit,
one could mistakenly provide the wrong package by running garble
in a directory containing a different package.

See #394.

											
										
										
											3 years ago
+									reverse        de-obfuscate output such as stack traces
-												clarify that "garble version" does more now (#612)

Since earlier this year, it has also printed build settings.
											
										
										
											2 years ago
+									version        print the version and build settings of the garble binary
-												always require one argument for "reverse"

The "reverse" command had many levels of optional arguments:

	garble [garble flags] reverse [build flags] [package] [files]

This was pretty confusing,
and could easily lead to people running the command incorrectly:

	# note that output.txt isn't a Go package!
	garble reverse output.txt

Moreover, it made the handling of Go build flags pretty confusing.
Should the command below work?

	garble reverse -tags=mytag

It also made it easy to not notice that one must supply the main package
to properly reverse some text that it produced, like a panic message.
With the package path being implicit,
one could mistakenly provide the wrong package by running garble
in a directory containing a different package.

See #394.

											
										
										
											3 years ago
 								To learn more about a command, run "garble help <command>".
-												fix and re-enable "garble test" (#268)

With the many refactors building up to v0.1.0, we broke "garble test" as
we no longer dealt with test packages well.

Luckily, now that we can depend on TOOLEXEC_IMPORTPATH, we can support
the test command again, as we can always figure out what package we're
currently compiling, without having to track a "main" package.

Note that one major pitfall there is test packages, where
TOOLEXEC_IMPORTPATH does not agree with ImportPath from "go list -json".
However, we can still work around that with a bit of glue code, which is
also copiously documented.

The second change necessary is to consider test packages private
depending on whether their non-test package is private or not. This can
be done via the ForTest field in "go list -json".

The third change is to obfuscate "_testmain.go" files, which are the
code-generated main functions which actually run tests. We used to not
need to obfuscate them, since test function names are never obfuscated
and we used to not obfuscate import paths at compilation time. Now we do
rewrite import paths, so we must do that for "_testmain.go" too.

The fourth change is to re-enable test.txt, and expand it with more
sanity checks and edge cases.

Finally, document "garble test" again.

Fixes #241.
											
										
										
											3 years ago
-												make "garble command -h" give command-specific help

Before, "garble build -h" would print the same as "garble -h", which is
too much and confusing, as it doesn't tell us much about "build".

Now it's far better, and includes the output of "go build -h":

	$ garble build -h
	usage: garble [garble flags] build [arguments]

	This command wraps "go build". Below is its help:

	usage: go build [-o output] [build flags] [packages]
	Run 'go help build' for details.

We do the same for "garble reverse -h", since it doesn't wrap a Go tool
command.

											
										
										
											3 years ago
+								garble accepts the following flags before a command:
-												all: update the docs a bit

Rework the features section in the README, leaving optional features at
the end of the list. Simplify the caveats list, too; the build cache and
exported field/method bits only need one point each. Overall, the
section was far too wordy for little reason.

Also redo the help text a bit. There's now a line to briefly introduce
the tool, as well as a link to the README with all the details. Finally,
the flags have shorter and more consistent help strings.

While at it, remove two unused global vars as spotted by staticcheck.

											
										
										
											4 years ago
-												initial commit

											
										
										
											5 years ago
+								`[1:])
 									flagSet.PrintDefaults()
-												all: update the docs a bit

Rework the features section in the README, leaving optional features at
the end of the list. Simplify the caveats list, too; the build cache and
exported field/method bits only need one point each. Overall, the
section was far too wordy for little reason.

Also redo the help text a bit. There's now a line to briefly introduce
the tool, as well as a link to the README with all the details. Finally,
the flags have shorter and more consistent help strings.

While at it, remove two unused global vars as spotted by staticcheck.

											
										
										
											4 years ago
+									fmt.Fprintf(os.Stderr, `
-												all: fix links after moving repository (#131)

A couple of places still linked to the personal repo.
											
										
										
											4 years ago
+								For more information, see https://github.com/burrowers/garble.
-												all: update the docs a bit

Rework the features section in the README, leaving optional features at
the end of the list. Simplify the caveats list, too; the build cache and
exported field/method bits only need one point each. Overall, the
section was far too wordy for little reason.

Also redo the help text a bit. There's now a line to briefly introduce
the tool, as well as a link to the README with all the details. Finally,
the flags have shorter and more consistent help strings.

While at it, remove two unused global vars as spotted by staticcheck.

											
										
										
											4 years ago
+								`[1:])
-												initial commit

											
										
										
											5 years ago
+								}
 								func main() { os.Exit(main1()) }
-												start hashing identifiers

											
										
										
											5 years ago
+								var (
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									// Presumably OK to share fset across packages.
 									fset = token.NewFileSet()
-												share a single temporary directory between all processes

Each compile and link sub-process created its own temporary directory,
to be cleaned up shortly after. Moreover, we also had the global
gob-encoded temporary file.

Instead, place all of those under a single, start-to-end temporary
directory. This is cleaner for the end user, and easier to maintain for
us.

A big plus is that we can also get rid of the confusing deferred global,
as it was mostly used to clean up these extra temp dirs. The only
remaining use was post-compile code, which is now an explicit func
returned by each "transform" func.

While at it, clean up the math/rand seeding code a bit and add a debug
log line, and stop shadowing a cmd string with a cmd *exec.Cmd.

Fixes #147.

											
										
										
											3 years ago
+									sharedTempDir = os.Getenv("GARBLE_SHARED")
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+									parentWorkDir = os.Getenv("GARBLE_PARENT_WORK")
-												start hashing identifiers

											
										
										
											5 years ago
+								)
-												error if the user forgot -trimpath

											
										
										
											5 years ago
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+								const actionGraphFileName = "action-graph.json"
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+								type importerWithMap struct {
 									importMap  map[string]string
 									importFrom func(path, dir string, mode types.ImportMode) (*types.Package, error)
 								}
-												wrap types.Importer to canonicalize import paths

The docs for go/importer.ForCompiler say:

	The lookup function is called each time the resulting importer
	needs to resolve an import path. In this mode the importer can
	only be invoked with canonical import paths (not relative or
	absolute ones); it is assumed that the translation to canonical
	import paths is being done by the client of the importer.

We use a lookup func for two reasons: first, to support modules, and
second, to be able to use our information from "go list -json -export".

However, go/types does not canonicalize import paths before calling
ImportFrom. This is somewhat understandable; it doesn't know whether an
importer was created with a lookup func, and ImportFrom only requires
the input path to be canonicalized in that scenario. When the lookup
func is nil, the importer canonicalizes by itself via go/build.Import.

Before this change, the added crossbuild test would fail:

	> garble build net/http
	[stderr]
	# vendor/golang.org/x/crypto/chacha20
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/crypto/chacha20/chacha_generic.go:10:2: could not import crypto/cipher (can't find import: "crypto/cipher")
	# vendor/golang.org/x/text/secure/bidirule
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/text/secure/bidirule/bidirule.go:12:2: could not import errors (can't find import: "errors")
	# vendor/golang.org/x/crypto/cryptobyte
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/crypto/cryptobyte/asn1.go:8:16: could not import encoding/asn1 (can't find import: "encoding/asn1")
	# vendor/golang.org/x/text/unicode/norm
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/text/unicode/norm/composition.go:7:8: could not import unicode/utf8 (can't find import: "unicode/utf8")

This is because we'd fall back to importer.Default, which only knows how
to find packages in $GOROOT/pkg. Those are missing for cross-builds,
unsurprisingly, as those built archives end up in the build cache.

After this change, we properly support importing std-vendored packages,
so we can get rid of the importer.Default workaround. And, by extension,
cross-builds now work as well.

Note that, in the added test script, the full build of the binary fails,
as there seems to be some sort of linker problem:

	> garble build
	[stderr]
	# test/main
	d9rqJyxo.uoqIiDs5: relocation target runtime.os9A16A3 not defined

We leave that as a TODO for now, as this change is subtle enough as it
is.

											
										
										
											3 years ago
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+								func (im importerWithMap) Import(path string) (*types.Package, error) {
-												wrap types.Importer to canonicalize import paths

The docs for go/importer.ForCompiler say:

	The lookup function is called each time the resulting importer
	needs to resolve an import path. In this mode the importer can
	only be invoked with canonical import paths (not relative or
	absolute ones); it is assumed that the translation to canonical
	import paths is being done by the client of the importer.

We use a lookup func for two reasons: first, to support modules, and
second, to be able to use our information from "go list -json -export".

However, go/types does not canonicalize import paths before calling
ImportFrom. This is somewhat understandable; it doesn't know whether an
importer was created with a lookup func, and ImportFrom only requires
the input path to be canonicalized in that scenario. When the lookup
func is nil, the importer canonicalizes by itself via go/build.Import.

Before this change, the added crossbuild test would fail:

	> garble build net/http
	[stderr]
	# vendor/golang.org/x/crypto/chacha20
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/crypto/chacha20/chacha_generic.go:10:2: could not import crypto/cipher (can't find import: "crypto/cipher")
	# vendor/golang.org/x/text/secure/bidirule
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/text/secure/bidirule/bidirule.go:12:2: could not import errors (can't find import: "errors")
	# vendor/golang.org/x/crypto/cryptobyte
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/crypto/cryptobyte/asn1.go:8:16: could not import encoding/asn1 (can't find import: "encoding/asn1")
	# vendor/golang.org/x/text/unicode/norm
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/text/unicode/norm/composition.go:7:8: could not import unicode/utf8 (can't find import: "unicode/utf8")

This is because we'd fall back to importer.Default, which only knows how
to find packages in $GOROOT/pkg. Those are missing for cross-builds,
unsurprisingly, as those built archives end up in the build cache.

After this change, we properly support importing std-vendored packages,
so we can get rid of the importer.Default workaround. And, by extension,
cross-builds now work as well.

Note that, in the added test script, the full build of the binary fails,
as there seems to be some sort of linker problem:

	> garble build
	[stderr]
	# test/main
	d9rqJyxo.uoqIiDs5: relocation target runtime.os9A16A3 not defined

We leave that as a TODO for now, as this change is subtle enough as it
is.

											
										
										
											3 years ago
+									panic("should never be called")
 								}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+								func (im importerWithMap) ImportFrom(path, dir string, mode types.ImportMode) (*types.Package, error) {
 									if path2 := im.importMap[path]; path2 != "" {
-												wrap types.Importer to canonicalize import paths

The docs for go/importer.ForCompiler say:

	The lookup function is called each time the resulting importer
	needs to resolve an import path. In this mode the importer can
	only be invoked with canonical import paths (not relative or
	absolute ones); it is assumed that the translation to canonical
	import paths is being done by the client of the importer.

We use a lookup func for two reasons: first, to support modules, and
second, to be able to use our information from "go list -json -export".

However, go/types does not canonicalize import paths before calling
ImportFrom. This is somewhat understandable; it doesn't know whether an
importer was created with a lookup func, and ImportFrom only requires
the input path to be canonicalized in that scenario. When the lookup
func is nil, the importer canonicalizes by itself via go/build.Import.

Before this change, the added crossbuild test would fail:

	> garble build net/http
	[stderr]
	# vendor/golang.org/x/crypto/chacha20
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/crypto/chacha20/chacha_generic.go:10:2: could not import crypto/cipher (can't find import: "crypto/cipher")
	# vendor/golang.org/x/text/secure/bidirule
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/text/secure/bidirule/bidirule.go:12:2: could not import errors (can't find import: "errors")
	# vendor/golang.org/x/crypto/cryptobyte
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/crypto/cryptobyte/asn1.go:8:16: could not import encoding/asn1 (can't find import: "encoding/asn1")
	# vendor/golang.org/x/text/unicode/norm
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/text/unicode/norm/composition.go:7:8: could not import unicode/utf8 (can't find import: "unicode/utf8")

This is because we'd fall back to importer.Default, which only knows how
to find packages in $GOROOT/pkg. Those are missing for cross-builds,
unsurprisingly, as those built archives end up in the build cache.

After this change, we properly support importing std-vendored packages,
so we can get rid of the importer.Default workaround. And, by extension,
cross-builds now work as well.

Note that, in the added test script, the full build of the binary fails,
as there seems to be some sort of linker problem:

	> garble build
	[stderr]
	# test/main
	d9rqJyxo.uoqIiDs5: relocation target runtime.os9A16A3 not defined

We leave that as a TODO for now, as this change is subtle enough as it
is.

											
										
										
											3 years ago
+										path = path2
 									}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									return im.importFrom(path, dir, mode)
 								}
 								func importerForPkg(lpkg *listedPackage) importerWithMap {
 									return importerWithMap{
 										importFrom: importer.ForCompiler(fset, "gc", func(path string) (io.ReadCloser, error) {
 											pkg, err := listPackage(lpkg, path)
 											if err != nil {
 												return nil, err
 											}
 											return os.Open(pkg.Export)
 										}).(types.ImporterFrom).ImportFrom,
 										importMap: lpkg.ImportMap,
 									}
-												wrap types.Importer to canonicalize import paths

The docs for go/importer.ForCompiler say:

	The lookup function is called each time the resulting importer
	needs to resolve an import path. In this mode the importer can
	only be invoked with canonical import paths (not relative or
	absolute ones); it is assumed that the translation to canonical
	import paths is being done by the client of the importer.

We use a lookup func for two reasons: first, to support modules, and
second, to be able to use our information from "go list -json -export".

However, go/types does not canonicalize import paths before calling
ImportFrom. This is somewhat understandable; it doesn't know whether an
importer was created with a lookup func, and ImportFrom only requires
the input path to be canonicalized in that scenario. When the lookup
func is nil, the importer canonicalizes by itself via go/build.Import.

Before this change, the added crossbuild test would fail:

	> garble build net/http
	[stderr]
	# vendor/golang.org/x/crypto/chacha20
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/crypto/chacha20/chacha_generic.go:10:2: could not import crypto/cipher (can't find import: "crypto/cipher")
	# vendor/golang.org/x/text/secure/bidirule
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/text/secure/bidirule/bidirule.go:12:2: could not import errors (can't find import: "errors")
	# vendor/golang.org/x/crypto/cryptobyte
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/crypto/cryptobyte/asn1.go:8:16: could not import encoding/asn1 (can't find import: "encoding/asn1")
	# vendor/golang.org/x/text/unicode/norm
	typecheck error: /usr/lib/go/src/vendor/golang.org/x/text/unicode/norm/composition.go:7:8: could not import unicode/utf8 (can't find import: "unicode/utf8")

This is because we'd fall back to importer.Default, which only knows how
to find packages in $GOROOT/pkg. Those are missing for cross-builds,
unsurprisingly, as those built archives end up in the build cache.

After this change, we properly support importing std-vendored packages,
so we can get rid of the importer.Default workaround. And, by extension,
cross-builds now work as well.

Note that, in the added test script, the full build of the binary fails,
as there seems to be some sort of linker problem:

	> garble build
	[stderr]
	# test/main
	d9rqJyxo.uoqIiDs5: relocation target runtime.os9A16A3 not defined

We leave that as a TODO for now, as this change is subtle enough as it
is.

											
										
										
											3 years ago
+								}
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+								// uniqueLineWriter sits underneath log.SetOutput to deduplicate log lines.
 								// We log bits of useful information for debugging,
 								// and logging the same detail twice is not going to help the user.
-												add a few TODOs with uncovered code that should not be

This will hopefully give new contributors a place to start.
Some of these, like verifying that the "help" commands work,
will be relatively simple additions to our test scripts.

											
										
										
											2 years ago
+								// Duplicates are relatively normal, given that names tend to repeat.
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+								type uniqueLineWriter struct {
 									out  io.Writer
 									seen map[string]bool
 								}
 								func (w *uniqueLineWriter) Write(p []byte) (n int, err error) {
 									if !flagDebug {
 										panic("unexpected use of uniqueLineWriter with -debug unset")
 									}
 									if bytes.Count(p, []byte("\n")) != 1 {
-												don't panic when we can error as easily

Panicking in small helpers or in funcs that don't return error
has proved useful to keep code easier to maintain,
particularly for cases that should typically never happen.

However, in these cases we can error just as easily.
In particular, I was getting a panic whenever I forgot
that I was running garble with Go master (1.23), which is over the top.

											
										
										
											4 months ago
+										return 0, fmt.Errorf("log write wasn't just one line: %q", p)
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+									}
 									if w.seen[string(p)] {
 										return len(p), nil
 									}
 									if w.seen == nil {
 										w.seen = make(map[string]bool)
 									}
 									w.seen[string(p)] = true
 									return w.out.Write(p)
 								}
 								// debugSince is like time.Since but resulting in shorter output.
 								// A build process takes at least hundreds of milliseconds,
 								// so extra decimal points in the order of microseconds aren't meaningful.
 								func debugSince(start time.Time) time.Duration {
 									return time.Since(start).Truncate(10 * time.Microsecond)
 								}
-												initial commit

											
										
										
											5 years ago
+								func main1() int {
-												start reporting total allocs by garble in the benchmark

This is like allocs/op by testing.B.ReportAllocs,
but it combines all allocations by garble sub-processes.
As we currently generate quite a bit of garbage,
and reductions in it may only reduce time/op very slowly,
this new metric will help us visualize small improvements.

The regular ReportAllocs would not help us at all,
as the main process simply executes "garble build".

We remove user-ns/op to make space for mallocs/op,
and also since it's a bit redundant given sys-ns/op and time-ns/op.

	name      time/op
	Build-16      9.20s ± 1%

	name      bin-B
	Build-16      5.16M ± 0%

	name      cached-time/op
	Build-16      304ms ± 4%

	name      mallocs/op
	Build-16      30.7M ± 0%

	name      sys-time/op
	Build-16      4.78s ± 4%

											
										
										
											2 years ago
+									defer func() {
 										if os.Getenv("GARBLE_WRITE_ALLOCS") != "true" {
 											return
 										}
 										var memStats runtime.MemStats
 										runtime.ReadMemStats(&memStats)
 										fmt.Fprintf(os.Stderr, "garble allocs: %d\n", memStats.Mallocs)
 									}()
-												initial commit

											
										
										
											5 years ago
+									if err := flagSet.Parse(os.Args[1:]); err != nil {
 										return 2
 									}
-												add a bit more docs

											
										
										
											5 years ago
+									log.SetPrefix("[garble] ")
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+									log.SetFlags(0) // no timestamps, as they aren't very useful
 									if flagDebug {
-												add a few TODOs with uncovered code that should not be

This will hopefully give new contributors a place to start.
Some of these, like verifying that the "help" commands work,
will be relatively simple additions to our test scripts.

											
										
										
											2 years ago
+										// TODO: cover this in the tests.
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+										log.SetOutput(&uniqueLineWriter{out: os.Stderr})
 									} else {
 										log.SetOutput(io.Discard)
 									}
-												initial commit

											
										
										
											5 years ago
+									args := flagSet.Args()
 									if len(args) < 1 {
-												prevent exiting early with early errors (#205)

The point of main1 returning an int is that testscript can run code
afterwards, such as to collect coverage information when running with
-coverprofile.

We were using plain os.Exit in a couple of places: when help was
requested, and when the Go version could not be fetched.

In those cases, return an error to main1, and let it do the right thing.

For #35.
											
										
										
											3 years ago
+										usage()
 										return 2
-												initial commit

											
										
										
											5 years ago
+									}
-												print chosen seed when building with -seed=random

The seedFlag.random field had never worked,
as my refactor in December 2021 never set it to true.

Even if the boolean was working, we only printed the random seed
when we failed. It's still useful to see it when a build succeeds,
for example when wanting to reproduce the same binary
or when wanting to reverse a panic from the produced binary.

Add a test this time.

Fixes #696.

											
										
										
											1 year ago
 									// If a random seed was used, the user won't be able to reproduce the
 									// same output or failure unless we print the random seed we chose.
 									// If the build failed and a random seed was used,
 									// the failure might not reproduce with a different seed.
 									// Print it before we exit.
 									if flagSeed.random {
 										fmt.Fprintf(os.Stderr, "-seed chosen at random: %s\n", base64.RawStdEncoding.EncodeToString(flagSeed.bytes))
 									}
-												split main1 with a func returning an error

											
										
										
											5 years ago
+									if err := mainErr(args); err != nil {
-												make "garble command -h" give command-specific help

Before, "garble build -h" would print the same as "garble -h", which is
too much and confusing, as it doesn't tell us much about "build".

Now it's far better, and includes the output of "go build -h":

	$ garble build -h
	usage: garble [garble flags] build [arguments]

	This command wraps "go build". Below is its help:

	usage: go build [-o output] [build flags] [packages]
	Run 'go help build' for details.

We do the same for "garble reverse -h", since it doesn't wrap a Go tool
command.

											
										
										
											3 years ago
+										if code, ok := err.(errJustExit); ok {
-												make early errors count towards code coverage

I recently added TODOs for bits of code we should cover in the tests.
I was looking at that again just now, and was puzzled;
we do indeed have test cases for many of them already.

We just weren't counting them towards code coverage due to a bug.
errJustExit works as expected, except that it calls os.Exit directly,
whereas testscript wants a non-zero return to run its "after" code.
Part of that code is what handles joining code coverage files.

The total code coverage jumps from 86.2% to 87.6%.

											
										
										
											2 years ago
+											return int(code)
-												make "garble command -h" give command-specific help

Before, "garble build -h" would print the same as "garble -h", which is
too much and confusing, as it doesn't tell us much about "build".

Now it's far better, and includes the output of "go build -h":

	$ garble build -h
	usage: garble [garble flags] build [arguments]

	This command wraps "go build". Below is its help:

	usage: go build [-o output] [build flags] [packages]
	Run 'go help build' for details.

We do the same for "garble reverse -h", since it doesn't wrap a Go tool
command.

											
										
										
											3 years ago
+										}
 										fmt.Fprintln(os.Stderr, err)
-												split main1 with a func returning an error

											
										
										
											5 years ago
+										return 1
 									}
 									return 0
 								}
-												introduce 'garble build' shortcut

This way, the user doesn't need to remember to use flags like -a and
-trimpath. Also because we might need more 'go build' flags in the
future.

											
										
										
											5 years ago
-												make "garble command -h" give command-specific help

Before, "garble build -h" would print the same as "garble -h", which is
too much and confusing, as it doesn't tell us much about "build".

Now it's far better, and includes the output of "go build -h":

	$ garble build -h
	usage: garble [garble flags] build [arguments]

	This command wraps "go build". Below is its help:

	usage: go build [-o output] [build flags] [packages]
	Run 'go help build' for details.

We do the same for "garble reverse -h", since it doesn't wrap a Go tool
command.

											
										
										
											3 years ago
+								type errJustExit int
 								func (e errJustExit) Error() string { return fmt.Sprintf("exit: %d", e) }
-												prevent exiting early with early errors (#205)

The point of main1 returning an int is that testscript can run code
afterwards, such as to collect coverage information when running with
-coverprofile.

We were using plain os.Exit in a couple of places: when help was
requested, and when the Go version could not be fetched.

In those cases, return an error to main1, and let it do the right thing.

For #35.
											
										
										
											3 years ago
-												follow-up patches to the 'go version' checking (#139)

Give the func a name that tells what the return value means.

Add missing newlines to printfs, use consistent quoting, and replace
"%s" with %q.

Document the Go 1.15 date.

Finally, fix the imports via goimports.
											
										
										
											4 years ago
+								func goVersionOK() bool {
-												fail early if we know we lack Go linker patches

Right now, we only have linker patches for Go 1.22.x.
We only ever maintain those for one or two major Go versions at a time.

If a user tries to use the Go toolchain from 1.21, we already fail
with "Go version too old" messages early on, but we don't for 1.23,
causing a relatively confusing error later on when we link a binary:

    cannot get modified linker: cannot retrieve linker patches: open patches/go1.23: file does not exist

Instead, fail early and with a good error message.

											
										
										
											4 months ago
+									const (
 										minGoVersion = "go1.22" // the first major version we support
 										maxGoVersion = "go1.23" // the first major version we don't support
 									)
-												Add go version validation (#136)

Fixes https://github.com/burrowers/garble/issues/121
											
										
										
											4 years ago
-												drop Go 1.21 and start using go/version

Needing to awkwardly treat Go versions as if they were semver
is no longer necessary thanks to go/version being in Go 1.22.0 now.

											
										
										
											4 months ago
+									// rxVersion looks for a version like "go1.2" or "go1.2.3" in `go env GOVERSION`.
-												avoid capturing groups in regular expressions

We don't use sub-matches captured by these groups,
so avoiding that extra work will save some CPU cycles.
It is likely insignificant compared to the rest of a Go build,
but it's a very easy little win.

											
										
										
											2 years ago
+									rxVersion := regexp.MustCompile(`go\d+\.\d+(?:\.\d+)?`)
-												fail if the current Go version is newer than what built garble

For instance, Go 1.18 added support for generics, so its compiler output
files changed format to accomodate for the new language feature.
If garble is built with Go 1.17 and then used to perform builds on Go
1.18, it will fail in a very confusing way, because garble's go/types
and go/importer packages will not know how to deal with that.

As already discussed in #269, require the version that built the garble
binary to be equal or newer. In that thread we discussed only comparing
the major version, so for example garble built on go1.18 could be used
on the toolchain go1.18.5. However, that could still fail in confusing
ways if a fix to go/types or go/importer happened in a point release.

While here, I noticed that we were still using Go 1.17 for some CI
checks. Fix that, except for staticcheck.

Fixes #269.

											
										
										
											2 years ago
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+									toolchainVersionFull := sharedCache.GoEnv.GOVERSION
-												drop Go 1.21 and start using go/version

Needing to awkwardly treat Go versions as if they were semver
is no longer necessary thanks to go/version being in Go 1.22.0 now.

											
										
										
											4 months ago
+									sharedCache.GoVersion = rxVersion.FindString(toolchainVersionFull)
 									if sharedCache.GoVersion == "" {
 										// Go 1.15.x and older did not have GOVERSION yet; they are too old anyway.
 										fmt.Fprintf(os.Stderr, "Go version is too old; please upgrade to %s or newer\n", minGoVersion)
-												Add go version validation (#136)

Fixes https://github.com/burrowers/garble/issues/121
											
										
										
											4 years ago
+										return false
 									}
-												drop Go 1.21 and start using go/version

Needing to awkwardly treat Go versions as if they were semver
is no longer necessary thanks to go/version being in Go 1.22.0 now.

											
										
										
											4 months ago
+									if version.Compare(sharedCache.GoVersion, minGoVersion) < 0 {
 										fmt.Fprintf(os.Stderr, "Go version %q is too old; please upgrade to %s or newer\n", toolchainVersionFull, minGoVersion)
-												fail if the current Go version is newer than what built garble

For instance, Go 1.18 added support for generics, so its compiler output
files changed format to accomodate for the new language feature.
If garble is built with Go 1.17 and then used to perform builds on Go
1.18, it will fail in a very confusing way, because garble's go/types
and go/importer packages will not know how to deal with that.

As already discussed in #269, require the version that built the garble
binary to be equal or newer. In that thread we discussed only comparing
the major version, so for example garble built on go1.18 could be used
on the toolchain go1.18.5. However, that could still fail in confusing
ways if a fix to go/types or go/importer happened in a point release.

While here, I noticed that we were still using Go 1.17 for some CI
checks. Fix that, except for staticcheck.

Fixes #269.

											
										
										
											2 years ago
+										return false
 									}
-												fail early if we know we lack Go linker patches

Right now, we only have linker patches for Go 1.22.x.
We only ever maintain those for one or two major Go versions at a time.

If a user tries to use the Go toolchain from 1.21, we already fail
with "Go version too old" messages early on, but we don't for 1.23,
causing a relatively confusing error later on when we link a binary:

    cannot get modified linker: cannot retrieve linker patches: open patches/go1.23: file does not exist

Instead, fail early and with a good error message.

											
										
										
											4 months ago
+									if version.Compare(sharedCache.GoVersion, maxGoVersion) >= 0 {
 										fmt.Fprintf(os.Stderr, "Go version %q is too new; Go linker patches aren't available for %s or later yet\n", toolchainVersionFull, maxGoVersion)
 										return false
 									}
-												fail if the current Go version is newer than what built garble

For instance, Go 1.18 added support for generics, so its compiler output
files changed format to accomodate for the new language feature.
If garble is built with Go 1.17 and then used to perform builds on Go
1.18, it will fail in a very confusing way, because garble's go/types
and go/importer packages will not know how to deal with that.

As already discussed in #269, require the version that built the garble
binary to be equal or newer. In that thread we discussed only comparing
the major version, so for example garble built on go1.18 could be used
on the toolchain go1.18.5. However, that could still fail in confusing
ways if a fix to go/types or go/importer happened in a point release.

While here, I noticed that we were still using Go 1.17 for some CI
checks. Fix that, except for staticcheck.

Fixes #269.

											
										
										
											2 years ago
 									// Ensure that the version of Go that built the garble binary is equal or
-												update gotip in CI and fix -tiny on the latest tip

printOneCgoTraceback now returns a boolean rather than an int.
Since we need to have different logic based on the Go version,
and toolchainVersionSemver was only set for the main process,
move the string to the shared cache global.
This is a nice thing to do anyway, to reduce the number of globals.

While here, update actions/setup-go to v4, which starts caching
GOMODCACHE and GOCACHE by default now.
Disable it, because it still doesn't help in our case,
and GitHub's Actions caching is still really inefficient.

And update staticcheck too.

											
										
										
											1 year ago
+									// newer than cache.GoVersionSemver.
-												start using some Go 1.22 features

We no longer need to worry about the scope of range variables,
we can iterate over integers directly, and we can use cmp.Or too.

I haven't paid close attention to using these everywhere.
This is mainly testing out the new features where I saw some benefit.

											
										
										
											4 months ago
+									builtVersionFull := cmp.Or(os.Getenv("GARBLE_TEST_GOVERSION"), runtime.Version())
-												fail if the current Go version is newer than what built garble

For instance, Go 1.18 added support for generics, so its compiler output
files changed format to accomodate for the new language feature.
If garble is built with Go 1.17 and then used to perform builds on Go
1.18, it will fail in a very confusing way, because garble's go/types
and go/importer packages will not know how to deal with that.

As already discussed in #269, require the version that built the garble
binary to be equal or newer. In that thread we discussed only comparing
the major version, so for example garble built on go1.18 could be used
on the toolchain go1.18.5. However, that could still fail in confusing
ways if a fix to go/types or go/importer happened in a point release.

While here, I noticed that we were still using Go 1.17 for some CI
checks. Fix that, except for staticcheck.

Fixes #269.

											
										
										
											2 years ago
+									builtVersion := rxVersion.FindString(builtVersionFull)
 									if builtVersion == "" {
 										// If garble built itself, we don't know what Go version was used.
 										// Fall back to not performing the check against the toolchain version.
 										return true
 									}
-												drop Go 1.21 and start using go/version

Needing to awkwardly treat Go versions as if they were semver
is no longer necessary thanks to go/version being in Go 1.22.0 now.

											
										
										
											4 months ago
+									if version.Compare(builtVersion, sharedCache.GoVersion) < 0 {
-												suggest a command when asking the user to rebuild garble

See a user's apparent confusion in #738.

											
										
										
											1 year ago
+										fmt.Fprintf(os.Stderr, `
-												drop Go 1.21 and start using go/version

Needing to awkwardly treat Go versions as if they were semver
is no longer necessary thanks to go/version being in Go 1.22.0 now.

											
										
										
											4 months ago
+								garble was built with %q and can't be used with the newer %q; rebuild it with a command like:
-												Use go install instead of garble install

garble install command does not exist

											
										
										
											9 months ago
+								    go install mvdan.cc/garble@latest
-												suggest a command when asking the user to rebuild garble

See a user's apparent confusion in #738.

											
										
										
											1 year ago
+								`[1:], builtVersionFull, toolchainVersionFull)
-												Add go version validation (#136)

Fixes https://github.com/burrowers/garble/issues/121
											
										
										
											4 years ago
+										return false
 									}
 									return true
 								}
-												split main1 with a func returning an error

											
										
										
											5 years ago
+								func mainErr(args []string) error {
-												use -toolexec="garble toolexec"

This way, the child process knows that it's running a toolchain command
via -toolexec without having to guess via filepath.IsAbs.

While here, improve the docs and tests a bit.

											
										
										
											2 years ago
+									command, args := args[0], args[1:]
 									// Catch users reaching for `go build -toolexec=garble`.
 									if command != "toolexec" && len(args) == 1 && args[0] == "-V=full" {
 										return fmt.Errorf(`did you run "go [command] -toolexec=garble" instead of "garble [command]"?`)
 									}
 									switch command {
-												clarify usage text, add help flags

Also remove the -toolexec equivalent, as it's becoming longer now that
we have GARBLE_DIR, and it might become out of date in the future again.
We don't want users to assume it will work forever.

											
										
										
											4 years ago
+									case "help":
-												always require one argument for "reverse"

The "reverse" command had many levels of optional arguments:

	garble [garble flags] reverse [build flags] [package] [files]

This was pretty confusing,
and could easily lead to people running the command incorrectly:

	# note that output.txt isn't a Go package!
	garble reverse output.txt

Moreover, it made the handling of Go build flags pretty confusing.
Should the command below work?

	garble reverse -tags=mytag

It also made it easy to not notice that one must supply the main package
to properly reverse some text that it produced, like a panic message.
With the package path being implicit,
one could mistakenly provide the wrong package by running garble
in a directory containing a different package.

See #394.

											
										
										
											3 years ago
+										if hasHelpFlag(args) || len(args) > 1 {
 											fmt.Fprintf(os.Stderr, "usage: garble help [command]\n")
 											return errJustExit(2)
 										}
 										if len(args) == 1 {
 											return mainErr([]string{args[0], "-h"})
-												make "help" refuse arguments for now

Since they are otherwise silently ignored.

											
										
										
											3 years ago
+										}
-												make "garble command -h" give command-specific help

Before, "garble build -h" would print the same as "garble -h", which is
too much and confusing, as it doesn't tell us much about "build".

Now it's far better, and includes the output of "go build -h":

	$ garble build -h
	usage: garble [garble flags] build [arguments]

	This command wraps "go build". Below is its help:

	usage: go build [-o output] [build flags] [packages]
	Run 'go help build' for details.

We do the same for "garble reverse -h", since it doesn't wrap a Go tool
command.

											
										
										
											3 years ago
+										usage()
 										return errJustExit(2)
-												add a "version" command (#220)

Mimicking "go version", this tells the user garble's own version.

The code is exactly the same that is used for another tool written in
Go, shfmt. It uses runtime/debug to fetch the module version embedded in
binaries built by Go. For example:

	$ go get mvdan.cc/sh/v3/cmd/shfmt@latest
	$ shfmt -version
	v3.2.2
	$ go get mvdan.cc/sh/v3/cmd/shfmt@master
	$ shfmt -version
	v3.3.0-0.dev.0.20210203135509-56c9918c980d

Note that this will not work for a plain "go build" or "go install"
after a "git clone", since in that case the Go tool can't know garble's
own version via go.mod - since it's the current main module:

	$ go build
	$ ./garble version
	(devel)

For the use case of the power user building from source directly, they
are probably clever enough to tell us what git commit they are on, so
this is not a big problem right now. It will also get better once
golang/go#37475 is fixed in the future.

Until then, if we need to do "release" builds locally, we can embed an
explicit version into the binary via ldflags:

	$ go build -ldflags=-X=main.version=v1.2.3
	$ ./garble version
	v1.2.3

Fixes #217.
											
										
										
											3 years ago
+									case "version":
-												always require one argument for "reverse"

The "reverse" command had many levels of optional arguments:

	garble [garble flags] reverse [build flags] [package] [files]

This was pretty confusing,
and could easily lead to people running the command incorrectly:

	# note that output.txt isn't a Go package!
	garble reverse output.txt

Moreover, it made the handling of Go build flags pretty confusing.
Should the command below work?

	garble reverse -tags=mytag

It also made it easy to not notice that one must supply the main package
to properly reverse some text that it produced, like a panic message.
With the package path being implicit,
one could mistakenly provide the wrong package by running garble
in a directory containing a different package.

See #394.

											
										
										
											3 years ago
+										if hasHelpFlag(args) || len(args) > 0 {
 											fmt.Fprintf(os.Stderr, "usage: garble version\n")
 											return errJustExit(2)
-												include a "garble version" test (#221)

Not sure why the last commit came with none.

While at it, I noticed that the version command would ignore arguments.
Error instead.
											
										
										
											3 years ago
+										}
-												make "garble version" include VCS information

When someone builds garble from a git clone,
the resulting binary used to not contain any information:

	$ garble version
	(devel)

Since Go 1.18, VCS information is stamped by default into binaries.
We now print it, alongside any other available build settings:

	$ garble version
	mvdan.cc/garble (devel)

	Build settings:
	       -compiler gc
	     CGO_ENABLED 1
	          GOARCH amd64
	            GOOS linux
	         GOAMD64 v3
	             vcs git
	    vcs.revision 91ea246349544769f5100c29f79cb0f173abfeea
	        vcs.time 2022-03-18T13:45:11Z
	    vcs.modified true

Note that it's still possible for a garble build to contain no useful
version information, such as when built via "go build -buildvcs=false".
However, if a user opts into omitting the information, it's on them to
figure out what version of garble they actually built.

While here, bump test-gotip.

Fixes #491.

											
										
										
											2 years ago
+										info, ok := debug.ReadBuildInfo()
 										if !ok {
 											// The build binary was stripped of build info?
 											// Could be the case if garble built itself.
 											fmt.Println("unknown")
 											return nil
 										}
 										mod := &info.Main
 										if mod.Replace != nil {
 											mod = mod.Replace
 										}
 										// For the tests.
-												go 1.18.x now sets -buildvcs=false for `go test`

That is, since Go 1.18.1, released back in April 2022.
We no longer need to worry about the buggy Go 1.18.0.

While here, use a clearer env var name; the settings are build settings.

											
										
										
											2 years ago
+										if v := os.Getenv("GARBLE_TEST_BUILDSETTINGS"); v != "" {
-												make "garble version" include VCS information

When someone builds garble from a git clone,
the resulting binary used to not contain any information:

	$ garble version
	(devel)

Since Go 1.18, VCS information is stamped by default into binaries.
We now print it, alongside any other available build settings:

	$ garble version
	mvdan.cc/garble (devel)

	Build settings:
	       -compiler gc
	     CGO_ENABLED 1
	          GOARCH amd64
	            GOOS linux
	         GOAMD64 v3
	             vcs git
	    vcs.revision 91ea246349544769f5100c29f79cb0f173abfeea
	        vcs.time 2022-03-18T13:45:11Z
	    vcs.modified true

Note that it's still possible for a garble build to contain no useful
version information, such as when built via "go build -buildvcs=false".
However, if a user opts into omitting the information, it's on them to
figure out what version of garble they actually built.

While here, bump test-gotip.

Fixes #491.

											
										
										
											2 years ago
+											var extra []debug.BuildSetting
 											if err := json.Unmarshal([]byte(v), &extra); err != nil {
 												return err
 											}
 											info.Settings = append(info.Settings, extra...)
 										}
-												make "garble version" friendlier for devel builds

The proposal at https://go.dev/issue/50603 has been approved,
so Go will at some point start producing module pseudo-versions
even if the main module was built from a VCS clone.

To not wait until a future release like Go 1.20,
implement that ourselves with the help of module.PseudoVersion.

The result is a friendlier version output; what used to be

	$ go install && garble version
	mvdan.cc/garble (devel)

	Build settings:
	[...]

will now look like

	$ go install && garble version
	mvdan.cc/garble v0.0.0-20220505210747-22e3d30216be

	Build settings:
	[...]

Note that we don't use VCS tags in any way, so the prefix is hard-coded
as v0.0.0. That seems fine for development builds, and Go doesn't embed
VCS tag information in binaries anyway.

Finally, note that we start printing the module sum, as it's redundant.
The VCS commit hash, at least in git, should be unique enough.

											
										
										
											2 years ago
+										// Until https://github.com/golang/go/issues/50603 is implemented,
 										// manually construct something like a pseudo-version.
-												minor tweaks in preparation for Go 1.21

Update CI to use a newer version of Go master,
now that we're already getting release candidates.

Look at the diffs between Go 1.20 and master of `go help build`
and `go help testflag`, and add two flags that were recently added.

While here, bump a hopeful TODO for a feature request,
since that one definitely did not happen for 1.21.

											
										
										
											11 months ago
+										// TODO: remove when this code is dead, hopefully in Go 1.22.
-												make "garble version" friendlier for devel builds

The proposal at https://go.dev/issue/50603 has been approved,
so Go will at some point start producing module pseudo-versions
even if the main module was built from a VCS clone.

To not wait until a future release like Go 1.20,
implement that ourselves with the help of module.PseudoVersion.

The result is a friendlier version output; what used to be

	$ go install && garble version
	mvdan.cc/garble (devel)

	Build settings:
	[...]

will now look like

	$ go install && garble version
	mvdan.cc/garble v0.0.0-20220505210747-22e3d30216be

	Build settings:
	[...]

Note that we don't use VCS tags in any way, so the prefix is hard-coded
as v0.0.0. That seems fine for development builds, and Go doesn't embed
VCS tag information in binaries anyway.

Finally, note that we start printing the module sum, as it's redundant.
The VCS commit hash, at least in git, should be unique enough.

											
										
										
											2 years ago
+										if mod.Version == "(devel)" {
 											var vcsTime time.Time
 											var vcsRevision string
 											for _, setting := range info.Settings {
 												switch setting.Key {
 												case "vcs.time":
 													// If the format is invalid, we'll print a zero timestamp.
 													vcsTime, _ = time.Parse(time.RFC3339Nano, setting.Value)
 												case "vcs.revision":
 													vcsRevision = setting.Value
 													if len(vcsRevision) > 12 {
 														vcsRevision = vcsRevision[:12]
 													}
 												}
 											}
 											if vcsRevision != "" {
 												mod.Version = module.PseudoVersion("", "", vcsTime, vcsRevision)
 											}
 										}
 										fmt.Printf("%s %s\n\n", mod.Path, mod.Version)
-												make "garble version" include VCS information

When someone builds garble from a git clone,
the resulting binary used to not contain any information:

	$ garble version
	(devel)

Since Go 1.18, VCS information is stamped by default into binaries.
We now print it, alongside any other available build settings:

	$ garble version
	mvdan.cc/garble (devel)

	Build settings:
	       -compiler gc
	     CGO_ENABLED 1
	          GOARCH amd64
	            GOOS linux
	         GOAMD64 v3
	             vcs git
	    vcs.revision 91ea246349544769f5100c29f79cb0f173abfeea
	        vcs.time 2022-03-18T13:45:11Z
	    vcs.modified true

Note that it's still possible for a garble build to contain no useful
version information, such as when built via "go build -buildvcs=false".
However, if a user opts into omitting the information, it's on them to
figure out what version of garble they actually built.

While here, bump test-gotip.

Fixes #491.

											
										
										
											2 years ago
+										fmt.Printf("Build settings:\n")
 										for _, setting := range info.Settings {
 											if setting.Value == "" {
 												continue // do empty build settings even matter?
-												add a "version" command (#220)

Mimicking "go version", this tells the user garble's own version.

The code is exactly the same that is used for another tool written in
Go, shfmt. It uses runtime/debug to fetch the module version embedded in
binaries built by Go. For example:

	$ go get mvdan.cc/sh/v3/cmd/shfmt@latest
	$ shfmt -version
	v3.2.2
	$ go get mvdan.cc/sh/v3/cmd/shfmt@master
	$ shfmt -version
	v3.3.0-0.dev.0.20210203135509-56c9918c980d

Note that this will not work for a plain "go build" or "go install"
after a "git clone", since in that case the Go tool can't know garble's
own version via go.mod - since it's the current main module:

	$ go build
	$ ./garble version
	(devel)

For the use case of the power user building from source directly, they
are probably clever enough to tell us what git commit they are on, so
this is not a big problem right now. It will also get better once
golang/go#37475 is fixed in the future.

Until then, if we need to do "release" builds locally, we can embed an
explicit version into the binary via ldflags:

	$ go build -ldflags=-X=main.version=v1.2.3
	$ ./garble version
	v1.2.3

Fixes #217.
											
										
										
											3 years ago
+											}
-												make "garble version" include VCS information

When someone builds garble from a git clone,
the resulting binary used to not contain any information:

	$ garble version
	(devel)

Since Go 1.18, VCS information is stamped by default into binaries.
We now print it, alongside any other available build settings:

	$ garble version
	mvdan.cc/garble (devel)

	Build settings:
	       -compiler gc
	     CGO_ENABLED 1
	          GOARCH amd64
	            GOOS linux
	         GOAMD64 v3
	             vcs git
	    vcs.revision 91ea246349544769f5100c29f79cb0f173abfeea
	        vcs.time 2022-03-18T13:45:11Z
	    vcs.modified true

Note that it's still possible for a garble build to contain no useful
version information, such as when built via "go build -buildvcs=false".
However, if a user opts into omitting the information, it's on them to
figure out what version of garble they actually built.

While here, bump test-gotip.

Fixes #491.

											
										
										
											2 years ago
+											// The padding helps keep readability by aligning:
 											//
 											//   veryverylong.key value
 											//          short.key some-other-value
 											//
 											// Empirically, 16 is enough; the longest key seen is "vcs.revision".
 											fmt.Printf("%16s %s\n", setting.Key, setting.Value)
-												add a "version" command (#220)

Mimicking "go version", this tells the user garble's own version.

The code is exactly the same that is used for another tool written in
Go, shfmt. It uses runtime/debug to fetch the module version embedded in
binaries built by Go. For example:

	$ go get mvdan.cc/sh/v3/cmd/shfmt@latest
	$ shfmt -version
	v3.2.2
	$ go get mvdan.cc/sh/v3/cmd/shfmt@master
	$ shfmt -version
	v3.3.0-0.dev.0.20210203135509-56c9918c980d

Note that this will not work for a plain "go build" or "go install"
after a "git clone", since in that case the Go tool can't know garble's
own version via go.mod - since it's the current main module:

	$ go build
	$ ./garble version
	(devel)

For the use case of the power user building from source directly, they
are probably clever enough to tell us what git commit they are on, so
this is not a big problem right now. It will also get better once
golang/go#37475 is fixed in the future.

Until then, if we need to do "release" builds locally, we can embed an
explicit version into the binary via ldflags:

	$ go build -ldflags=-X=main.version=v1.2.3
	$ ./garble version
	v1.2.3

Fixes #217.
											
										
										
											3 years ago
+										}
 										return nil
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+									case "reverse":
 										return commandReverse(args)
-												support `garble run`

It's the third time I've wanted it to quickly test out standalone
reproducers when people submit bugs.

Fixes #661.

											
										
										
											1 year ago
+									case "build", "test", "run":
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+										cmd, err := toolexecCmd(command, args)
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+										defer func() {
 											if err := os.RemoveAll(os.Getenv("GARBLE_SHARED")); err != nil {
 												fmt.Fprintf(os.Stderr, "could not clean up GARBLE_SHARED: %v\n", err)
 											}
-												internal/linker: place files under GARBLE_CACHE

This means we now have a unified cache directory for garble,
which is now documented in the README.

I considered using the same hash-based cache used for pkgCache,
but decided against it since that cache implementation only stores
regular files without any executable bits set.
We could force the cache package to do what we want here,
but I'm leaning against it for now given that single files work OK.

											
										
										
											1 year ago
+											// skip the trim if we didn't even start a build
 											if sharedCache != nil {
 												fsCache, err := openCache()
 												if err == nil {
 													err = fsCache.Trim()
 												}
 												if err != nil {
 													fmt.Fprintf(os.Stderr, "could not trim GARBLE_CACHE: %v\n", err)
 												}
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+											}
 										}()
-												Share data between processes via a shared file. (#192)

Previously garble heavily used env vars to share data between processes.
This also makes it easy to share complex data between processes.

The complexity of main.go is considerably reduced.
											
										
										
											4 years ago
+										if err != nil {
 											return err
-												complain when GOPRIVATE matches no packages

This is important, because it would mean that we would obfuscate
nothing. At best, it would be confusing; at worst, it could mislead
the user into thinking the binary is obfuscated.

Fixes #20.
Updates #108.

											
										
										
											4 years ago
+										}
-												introduce 'garble build' shortcut

This way, the user doesn't need to remember to use flags like -a and
-trimpath. Also because we might need more 'go build' flags in the
future.

											
										
										
											5 years ago
+										cmd.Stdout = os.Stdout
 										cmd.Stderr = os.Stderr
-												remove a couple of easy TODOs

First, I tried to follow my own past advice to only set GarbleActionID
if ToObfuscate is true. However, that broke at least three parts of
transformCompile, as the hash is used for more than I recalled.
Give up on that idea, because the current code is working as intended.
Better document what GarbleActionID is and what we use it for.

Second, now that https://go.dev/cl/348741 was shipped with Go 1.18,
using the logger when its output is io.Discard is already a no-op.
So we no longer need our debugf wrapper to apply the no-op logic.

											
										
										
											2 years ago
+										log.Printf("calling via toolexec: %s", cmd)
-												split main1 with a func returning an error

											
										
										
											5 years ago
+										return cmd.Run()
-												share a single temporary directory between all processes

Each compile and link sub-process created its own temporary directory,
to be cleaned up shortly after. Moreover, we also had the global
gob-encoded temporary file.

Instead, place all of those under a single, start-to-end temporary
directory. This is cleaner for the end user, and easier to maintain for
us.

A big plus is that we can also get rid of the confusing deferred global,
as it was mostly used to clean up these extra temp dirs. The only
remaining use was post-compile code, which is now an explicit func
returned by each "transform" func.

While at it, clean up the math/rand seeding code a bit and add a debug
log line, and stop shadowing a cmd string with a cmd *exec.Cmd.

Fixes #147.

											
										
										
											3 years ago
-												use -toolexec="garble toolexec"

This way, the child process knows that it's running a toolchain command
via -toolexec without having to guess via filepath.IsAbs.

While here, improve the docs and tests a bit.

											
										
										
											2 years ago
+									case "toolexec":
 										_, tool := filepath.Split(args[0])
 										if runtime.GOOS == "windows" {
 											tool = strings.TrimSuffix(tool, ".exe")
 										}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										transform := transformMethods[tool]
-												use -toolexec="garble toolexec"

This way, the child process knows that it's running a toolchain command
via -toolexec without having to guess via filepath.IsAbs.

While here, improve the docs and tests a bit.

											
										
										
											2 years ago
+										transformed := args[1:]
 										if transform != nil {
 											startTime := time.Now()
 											log.Printf("transforming %s with args: %s", tool, strings.Join(transformed, " "))
-												avoid call to loadSharedCache for toolexec calls we skip

We were doing too much work for tools we don't need to wrap at all,
such as `go tool pack` or `go tool buildid`, which get run about as
often as the compiler does.

The overhead is small per call, but it adds up.

	name      old time/op         new time/op         delta
	Build-16          20.7s ± 0%          20.5s ± 1%    ~     (p=0.057 n=4+4)

	name      old bin-B           new bin-B           delta
	Build-16          5.67M ± 0%          5.66M ± 0%  -0.07%  (p=0.029 n=4+4)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          707ms ± 0%          705ms ± 2%    ~     (p=0.886 n=4+4)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          25.0M ± 0%          24.9M ± 0%  -0.26%  (p=0.029 n=4+4)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          8.32s ± 2%          7.90s ± 3%  -5.05%  (p=0.029 n=4+4)

											
										
										
											1 year ago
 											// We're in a toolexec sub-process, not directly called by the user.
 											// Load the shared data and wrap the tool, like the compiler or linker.
 											if err := loadSharedCache(); err != nil {
 												return err
 											}
 											if len(args) == 2 && args[1] == "-V=full" {
 												return alterToolVersion(tool, args)
 											}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+											var tf transformer
-												avoid call to loadSharedCache for toolexec calls we skip

We were doing too much work for tools we don't need to wrap at all,
such as `go tool pack` or `go tool buildid`, which get run about as
often as the compiler does.

The overhead is small per call, but it adds up.

	name      old time/op         new time/op         delta
	Build-16          20.7s ± 0%          20.5s ± 1%    ~     (p=0.057 n=4+4)

	name      old bin-B           new bin-B           delta
	Build-16          5.67M ± 0%          5.66M ± 0%  -0.07%  (p=0.029 n=4+4)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          707ms ± 0%          705ms ± 2%    ~     (p=0.886 n=4+4)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          25.0M ± 0%          24.9M ± 0%  -0.26%  (p=0.029 n=4+4)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          8.32s ± 2%          7.90s ± 3%  -5.05%  (p=0.029 n=4+4)

											
										
										
											1 year ago
+											toolexecImportPath := os.Getenv("TOOLEXEC_IMPORTPATH")
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+											tf.curPkg = sharedCache.ListedPackages[toolexecImportPath]
 											if tf.curPkg == nil {
-												avoid call to loadSharedCache for toolexec calls we skip

We were doing too much work for tools we don't need to wrap at all,
such as `go tool pack` or `go tool buildid`, which get run about as
often as the compiler does.

The overhead is small per call, but it adds up.

	name      old time/op         new time/op         delta
	Build-16          20.7s ± 0%          20.5s ± 1%    ~     (p=0.057 n=4+4)

	name      old bin-B           new bin-B           delta
	Build-16          5.67M ± 0%          5.66M ± 0%  -0.07%  (p=0.029 n=4+4)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          707ms ± 0%          705ms ± 2%    ~     (p=0.886 n=4+4)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          25.0M ± 0%          24.9M ± 0%  -0.26%  (p=0.029 n=4+4)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          8.32s ± 2%          7.90s ± 3%  -5.05%  (p=0.029 n=4+4)

											
										
										
											1 year ago
+												return fmt.Errorf("TOOLEXEC_IMPORTPATH not found in listed packages: %s", toolexecImportPath)
 											}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+											tf.origImporter = importerForPkg(tf.curPkg)
-												avoid call to loadSharedCache for toolexec calls we skip

We were doing too much work for tools we don't need to wrap at all,
such as `go tool pack` or `go tool buildid`, which get run about as
often as the compiler does.

The overhead is small per call, but it adds up.

	name      old time/op         new time/op         delta
	Build-16          20.7s ± 0%          20.5s ± 1%    ~     (p=0.057 n=4+4)

	name      old bin-B           new bin-B           delta
	Build-16          5.67M ± 0%          5.66M ± 0%  -0.07%  (p=0.029 n=4+4)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          707ms ± 0%          705ms ± 2%    ~     (p=0.886 n=4+4)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          25.0M ± 0%          24.9M ± 0%  -0.26%  (p=0.029 n=4+4)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          8.32s ± 2%          7.90s ± 3%  -5.05%  (p=0.029 n=4+4)

											
										
										
											1 year ago
-												use -toolexec="garble toolexec"

This way, the child process knows that it's running a toolchain command
via -toolexec without having to guess via filepath.IsAbs.

While here, improve the docs and tests a bit.

											
										
										
											2 years ago
+											var err error
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+											if transformed, err = transform(&tf, transformed); err != nil {
-												use -toolexec="garble toolexec"

This way, the child process knows that it's running a toolchain command
via -toolexec without having to guess via filepath.IsAbs.

While here, improve the docs and tests a bit.

											
										
										
											2 years ago
+												return err
 											}
 											log.Printf("transformed args for %s in %s: %s", tool, debugSince(startTime), strings.Join(transformed, " "))
 										} else {
 											log.Printf("skipping transform on %s with args: %s", tool, strings.Join(transformed, " "))
 										}
-												patch and rebuild cmd/link to modify the magic value in pclntab

This value is hard-coded in the linker and written in a header.
We could rewrite the final binary, like we used to do with import paths,
but that would require once again maintaining libraries to do so.

Instead, we're now modifying the linker to do what we want.
It's not particularly hard, as every Go install has its source code,
and rebuilding a slightly modified linker only takes a few seconds at most.

Thanks to `go build -overlay`, we only need to copy the files we modify,
and right now we're just modifying one file in the toolchain.
We use a git patch, as the change is fairly static and small,
and the patch is easier to understand and maintain.

The other side of this change is in the runtime,
as it also hard-codes the magic value when loading information.
We modify the code via syntax trees in that case, like `-tiny` does,
because the change is tiny (one literal) and the affected lines of code
are modified regularly between major Go releases.

Since rebuilding a slightly modified linker can take a few seconds,
and Go's build cache does not cache linked binaries,
we keep our own cached version of the rebuilt binary in `os.UserCacheDir`.

The feature isn't perfect, and will be improved in the future.
See the TODOs about the added dependency on `git`,
or how we are currently only able to cache one linker binary at once.

Fixes #622.
											
										
										
											1 year ago
 										executablePath := args[0]
 										if tool == "link" {
-												internal/linker: place files under GARBLE_CACHE

This means we now have a unified cache directory for garble,
which is now documented in the README.

I considered using the same hash-based cache used for pkgCache,
but decided against it since that cache implementation only stores
regular files without any executable bits set.
We could force the cache package to do what we want here,
but I'm leaning against it for now given that single files work OK.

											
										
										
											1 year ago
+											modifiedLinkPath, unlock, err := linker.PatchLinker(sharedCache.GoEnv.GOROOT, sharedCache.GoEnv.GOVERSION, sharedCache.CacheDir, sharedTempDir)
-												patch and rebuild cmd/link to modify the magic value in pclntab

This value is hard-coded in the linker and written in a header.
We could rewrite the final binary, like we used to do with import paths,
but that would require once again maintaining libraries to do so.

Instead, we're now modifying the linker to do what we want.
It's not particularly hard, as every Go install has its source code,
and rebuilding a slightly modified linker only takes a few seconds at most.

Thanks to `go build -overlay`, we only need to copy the files we modify,
and right now we're just modifying one file in the toolchain.
We use a git patch, as the change is fairly static and small,
and the patch is easier to understand and maintain.

The other side of this change is in the runtime,
as it also hard-codes the magic value when loading information.
We modify the code via syntax trees in that case, like `-tiny` does,
because the change is tiny (one literal) and the affected lines of code
are modified regularly between major Go releases.

Since rebuilding a slightly modified linker can take a few seconds,
and Go's build cache does not cache linked binaries,
we keep our own cached version of the rebuilt binary in `os.UserCacheDir`.

The feature isn't perfect, and will be improved in the future.
See the TODOs about the added dependency on `git`,
or how we are currently only able to cache one linker binary at once.

Fixes #622.
											
										
										
											1 year ago
+											if err != nil {
 												return fmt.Errorf("cannot get modified linker: %v", err)
 											}
 											defer unlock()
 											executablePath = modifiedLinkPath
 											os.Setenv(linker.MagicValueEnv, strconv.FormatUint(uint64(magicValue()), 10))
-												implement funcInfo.entryoff encryption

At linker stage, we now encrypt funcInfo.entryoff value with a simple algorithm (1 xor + 1 mul). 
This makes it harder to relate function metadata (e.g. name) to function itself in binary, almost without affecting performance.
											
										
										
											1 year ago
+											os.Setenv(linker.EntryOffKeyEnv, strconv.FormatUint(uint64(entryOffKey()), 10))
-												remove all unexported func names with -tiny via the linker

The patch to the linker does this when generating the pclntab,
which is the binary section containing func names.
When `-tiny` is being used, we look for unexported funcs,
and set their names to the offset `0` - a shared empty string.

We also avoid including the original name in the binary,
which saves a significant amount of space.
The following stats were collected on GOOS=linux,
which show that `-tiny` is now about 4% smaller:

	go build                  1203067
	garble build               782336
	(old) garble -tiny build   688128
	(new) garble -tiny build   659456
											
										
										
											1 year ago
+											if flagTiny {
 												os.Setenv(linker.TinyEnv, "true")
 											}
-												patch and rebuild cmd/link to modify the magic value in pclntab

This value is hard-coded in the linker and written in a header.
We could rewrite the final binary, like we used to do with import paths,
but that would require once again maintaining libraries to do so.

Instead, we're now modifying the linker to do what we want.
It's not particularly hard, as every Go install has its source code,
and rebuilding a slightly modified linker only takes a few seconds at most.

Thanks to `go build -overlay`, we only need to copy the files we modify,
and right now we're just modifying one file in the toolchain.
We use a git patch, as the change is fairly static and small,
and the patch is easier to understand and maintain.

The other side of this change is in the runtime,
as it also hard-codes the magic value when loading information.
We modify the code via syntax trees in that case, like `-tiny` does,
because the change is tiny (one literal) and the affected lines of code
are modified regularly between major Go releases.

Since rebuilding a slightly modified linker can take a few seconds,
and Go's build cache does not cache linked binaries,
we keep our own cached version of the rebuilt binary in `os.UserCacheDir`.

The feature isn't perfect, and will be improved in the future.
See the TODOs about the added dependency on `git`,
or how we are currently only able to cache one linker binary at once.

Fixes #622.
											
										
										
											1 year ago
 											log.Printf("replaced linker with: %s", executablePath)
 										}
 										cmd := exec.Command(executablePath, transformed...)
-												use -toolexec="garble toolexec"

This way, the child process knows that it's running a toolchain command
via -toolexec without having to guess via filepath.IsAbs.

While here, improve the docs and tests a bit.

											
										
										
											2 years ago
+										cmd.Stdout = os.Stdout
 										cmd.Stderr = os.Stderr
 										if err := cmd.Run(); err != nil {
-												split main1 with a func returning an error

											
										
										
											5 years ago
+											return err
-												initial commit

											
										
										
											5 years ago
+										}
-												use -toolexec="garble toolexec"

This way, the child process knows that it's running a toolchain command
via -toolexec without having to guess via filepath.IsAbs.

While here, improve the docs and tests a bit.

											
										
										
											2 years ago
+										return nil
 									default:
 										return fmt.Errorf("unknown command: %q", command)
-												initial commit

											
										
										
											5 years ago
+									}
 								}
-												make "garble command -h" give command-specific help

Before, "garble build -h" would print the same as "garble -h", which is
too much and confusing, as it doesn't tell us much about "build".

Now it's far better, and includes the output of "go build -h":

	$ garble build -h
	usage: garble [garble flags] build [arguments]

	This command wraps "go build". Below is its help:

	usage: go build [-o output] [build flags] [packages]
	Run 'go help build' for details.

We do the same for "garble reverse -h", since it doesn't wrap a Go tool
command.

											
										
										
											3 years ago
+								func hasHelpFlag(flags []string) bool {
 									for _, f := range flags {
 										switch f {
 										case "-h", "-help", "--help":
 											return true
 										}
 									}
 									return false
 								}
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+								// toolexecCmd builds an *exec.Cmd which is set up for running "go <command>"
 								// with -toolexec=garble and the supplied arguments.
 								//
 								// Note that it uses and modifies global state; in general, it should only be
 								// called once from mainErr in the top-level garble process.
 								func toolexecCmd(command string, args []string) (*exec.Cmd, error) {
 									// Split the flags from the package arguments, since we'll need
 									// to run 'go list' on the same set of packages.
 									flags, args := splitFlagsFromArgs(args)
-												make "garble command -h" give command-specific help

Before, "garble build -h" would print the same as "garble -h", which is
too much and confusing, as it doesn't tell us much about "build".

Now it's far better, and includes the output of "go build -h":

	$ garble build -h
	usage: garble [garble flags] build [arguments]

	This command wraps "go build". Below is its help:

	usage: go build [-o output] [build flags] [packages]
	Run 'go help build' for details.

We do the same for "garble reverse -h", since it doesn't wrap a Go tool
command.

											
										
										
											3 years ago
+									if hasHelpFlag(flags) {
 										out, _ := exec.Command("go", command, "-h").CombinedOutput()
 										fmt.Fprintf(os.Stderr, `
 								usage: garble [garble flags] %s [arguments]
 								This command wraps "go %s". Below is its help:
 								%s`[1:], command, command, out)
 										return nil, errJustExit(2)
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+									}
-												give a useful error for "garble build -tiny"

We've had confused users a handful of times by now.
And it's reasonable to expect flags to be after the command,
as that's how flags work for cmd/go itself.

I don't think we want to mix our flags with Go's,
or start accepting flags in either place.
Both seem like worse solutions long-term, as they can add confusion.

However, we can quickly give a useful hint when a flag is misplaced.
That should get new users unblocked without asking for help.

We use a regular expression for this purpose,
because it doesn't seem like a FlagSet supports what we need;
to detect whether an argument is one of our flags,
without actually applying its value to the flagset.
Our flagset would also error on Go's flags, which we don't want.

											
										
										
											2 years ago
+									for _, flag := range flags {
 										if rxGarbleFlag.MatchString(flag) {
 											return nil, fmt.Errorf("garble flags must precede command, like: garble %s build ./pkg", flag)
 										}
 									}
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
-												start using original action IDs (#251)

When we obfuscate a name, what we do is hash the name with the action ID
of the package that contains the name. To ensure that the hash changes
if the garble tool changes, we used the action ID of the obfuscated
build, which is different than the original action ID, as we include
garble's own content ID in "go tool compile -V=full" via -toolexec.

Let's call that the "obfuscated action ID". Remember that a content ID
is roughly the hash of a binary or object file, and an action ID
contains the hash of a package's source code plus the content IDs of its
dependencies.

This had the advantage that it did what we wanted. However, it had one
massive drawback: when we compile a package, we only have the obfuscated
action IDs of its dependencies. This is because one can't have the
content ID of dependent packages before they are built.

Usually, this is not a problem, because hashing a foreign name means it
comes from a dependency, where we already have the obfuscated action ID.
However, that's not always the case.

First, go:linkname directives can point to any symbol that ends up in
the binary, even if the package is not a dependency. So garble could
only support linkname targets belonging to dependencies. This is at the
root of why we could not obfuscate the runtime; it contains linkname
directives targeting the net package, for example, which depends on runtime.

Second, some other places did not have an easy access to obfuscated
action IDs, like transformAsm, which had to recover it from a temporary
file stored by transformCompile.

Plus, this was all pretty expensive, as each toolexec sub-process had to
make repeated calls to buildidOf with the object files of dependencies.
We even had to use extra calls to "go list" in the case of indirect
dependencies, as their export files do not appear in importcfg files.

All in all, the old method was complex and expensive. A better mechanism
is to use the original action IDs directly, as listed by "go list"
without garble in the picture.

This would mean that the hashing does not change if garble changes,
meaning weaker obfuscation. To regain that property, we define the
"garble action ID", which is just the original action ID hashed together
with garble's own content ID.

This is practically the same as the obfuscated build ID we used before,
but since it doesn't go through "go tool compile -V=full" and the
obfuscated build itself, we can work out *all* the garble action IDs
upfront, before the obfuscated build even starts.

This fixes all of our problems. Now we know all garble build IDs
upfront, so a bunch of hacks can be entirely removed. Plus, since we
know them upfront, we can also cache them and avoid repeated calls to
"go tool buildid".

While at it, make use of the new BuildID field in Go 1.16's "list -json
-export". This avoids the vast majority of "go tool buildid" calls, as
the only ones that remain are 2 on the garble binary itself.

The numbers for Go 1.16 look very good:

	name     old time/op       new time/op       delta
	Build-8        146ms ± 4%        101ms ± 1%  -31.01%  (p=0.002 n=6+6)

	name     old bin-B         new bin-B         delta
	Build-8        6.61M ± 0%        6.60M ± 0%   -0.09%  (p=0.002 n=6+6)

	name     old sys-time/op   new sys-time/op   delta
	Build-8        321ms ± 7%        202ms ± 6%  -37.11%  (p=0.002 n=6+6)

	name     old user-time/op  new user-time/op  delta
	Build-8        538ms ± 4%        414ms ± 4%  -23.12%  (p=0.002 n=6+6)
											
										
										
											3 years ago
+									// Here is the only place we initialize the cache.
 									// The sub-processes will parse it from a shared gob file.
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+									sharedCache = &sharedCacheType{}
-												start using original action IDs (#251)

When we obfuscate a name, what we do is hash the name with the action ID
of the package that contains the name. To ensure that the hash changes
if the garble tool changes, we used the action ID of the obfuscated
build, which is different than the original action ID, as we include
garble's own content ID in "go tool compile -V=full" via -toolexec.

Let's call that the "obfuscated action ID". Remember that a content ID
is roughly the hash of a binary or object file, and an action ID
contains the hash of a package's source code plus the content IDs of its
dependencies.

This had the advantage that it did what we wanted. However, it had one
massive drawback: when we compile a package, we only have the obfuscated
action IDs of its dependencies. This is because one can't have the
content ID of dependent packages before they are built.

Usually, this is not a problem, because hashing a foreign name means it
comes from a dependency, where we already have the obfuscated action ID.
However, that's not always the case.

First, go:linkname directives can point to any symbol that ends up in
the binary, even if the package is not a dependency. So garble could
only support linkname targets belonging to dependencies. This is at the
root of why we could not obfuscate the runtime; it contains linkname
directives targeting the net package, for example, which depends on runtime.

Second, some other places did not have an easy access to obfuscated
action IDs, like transformAsm, which had to recover it from a temporary
file stored by transformCompile.

Plus, this was all pretty expensive, as each toolexec sub-process had to
make repeated calls to buildidOf with the object files of dependencies.
We even had to use extra calls to "go list" in the case of indirect
dependencies, as their export files do not appear in importcfg files.

All in all, the old method was complex and expensive. A better mechanism
is to use the original action IDs directly, as listed by "go list"
without garble in the picture.

This would mean that the hashing does not change if garble changes,
meaning weaker obfuscation. To regain that property, we define the
"garble action ID", which is just the original action ID hashed together
with garble's own content ID.

This is practically the same as the obfuscated build ID we used before,
but since it doesn't go through "go tool compile -V=full" and the
obfuscated build itself, we can work out *all* the garble action IDs
upfront, before the obfuscated build even starts.

This fixes all of our problems. Now we know all garble build IDs
upfront, so a bunch of hacks can be entirely removed. Plus, since we
know them upfront, we can also cache them and avoid repeated calls to
"go tool buildid".

While at it, make use of the new BuildID field in Go 1.16's "list -json
-export". This avoids the vast majority of "go tool buildid" calls, as
the only ones that remain are 2 on the garble binary itself.

The numbers for Go 1.16 look very good:

	name     old time/op       new time/op       delta
	Build-8        146ms ± 4%        101ms ± 1%  -31.01%  (p=0.002 n=6+6)

	name     old bin-B         new bin-B         delta
	Build-8        6.61M ± 0%        6.60M ± 0%   -0.09%  (p=0.002 n=6+6)

	name     old sys-time/op   new sys-time/op   delta
	Build-8        321ms ± 7%        202ms ± 6%  -37.11%  (p=0.002 n=6+6)

	name     old user-time/op  new user-time/op  delta
	Build-8        538ms ± 4%        414ms ± 4%  -23.12%  (p=0.002 n=6+6)
											
										
										
											3 years ago
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+									// Note that we also need to pass build flags to 'go list', such
 									// as -tags.
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+									sharedCache.ForwardBuildFlags, _ = filterForwardBuildFlags(flags)
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+									if command == "test" {
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+										sharedCache.ForwardBuildFlags = append(sharedCache.ForwardBuildFlags, "-test")
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+									}
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
+									if err := fetchGoEnv(); err != nil {
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+										return nil, err
 									}
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
+									if !goVersionOK() {
-												make "garble command -h" give command-specific help

Before, "garble build -h" would print the same as "garble -h", which is
too much and confusing, as it doesn't tell us much about "build".

Now it's far better, and includes the output of "go build -h":

	$ garble build -h
	usage: garble [garble flags] build [arguments]

	This command wraps "go build". Below is its help:

	usage: go build [-o output] [build flags] [packages]
	Run 'go help build' for details.

We do the same for "garble reverse -h", since it doesn't wrap a Go tool
command.

											
										
										
											3 years ago
+										return nil, errJustExit(1)
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
+									}
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+									var err error
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+									sharedCache.ExecPath, err = os.Executable()
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+									if err != nil {
 										return nil, err
 									}
-												internal/linker: place files under GARBLE_CACHE

This means we now have a unified cache directory for garble,
which is now documented in the README.

I considered using the same hash-based cache used for pkgCache,
but decided against it since that cache implementation only stores
regular files without any executable bits set.
We could force the cache package to do what we want here,
but I'm leaning against it for now given that single files work OK.

											
										
										
											1 year ago
+									// Always an absolute directory; defaults to e.g. "~/.cache/garble".
 									if dir := os.Getenv("GARBLE_CACHE"); dir != "" {
 										sharedCache.CacheDir, err = filepath.Abs(dir)
 										if err != nil {
 											return nil, err
 										}
 									} else {
 										parentDir, err := os.UserCacheDir()
 										if err != nil {
 											return nil, err
 										}
 										sharedCache.CacheDir = filepath.Join(parentDir, "garble")
 									}
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+									binaryBuildID, err := buildidOf(sharedCache.ExecPath)
-												ensure the runtime is built in a reproducible way

We went to great lengths to ensure garble builds are reproducible.
This includes how the tool itself works,
as its behavior should be the same given the same inputs.

However, we made one crucial mistake with the runtime package.
It has go:linkname directives pointing at other packages,
and some of those pointed packages aren't its dependencies.

Imagine two scenarios where garble builds the runtime package:

1) We run "garble build runtime". The way we handle linkname directives
   calls listPackage on the target package, to obfuscate the target's
   import path and object name. However, since we only obtained build
   info of runtime and its deps, calls for some linknames such as
   listPackage("sync/atomic") will fail. The linkname directive will
   leave its target untouched.

2) We run "garble build std". Unlike the first scenario, all listPackage
   calls issued by runtime's linkname directives will succeed, so its
   linkname directive targets will be obfuscated.

At best, this can result in inconsistent builds, depending on how the
runtime package was built. At worst, the mismatching object names can
result in errors at link time, if the target packages are actually used.

The modified test reproduces the worst case scenario reliably,
when the fix is reverted:

	> env GOCACHE=${WORK}/gocache-empty
	> garble build -a runtime
	> garble build -o=out_rebuild ./stdimporter
	[stderr]
	# test/main/stdimporter
	JZzQivnl.NtQJu0H3: relocation target JZzQivnl.iioHinYT not defined
	JZzQivnl.NtQJu0H3.func9: relocation target JZzQivnl.yz5z0NaH not defined
	JZzQivnl.(*ypvqhKiQ).String: relocation target JZzQivnl.eVciBQeI not defined
	JZzQivnl.(*ypvqhKiQ).PkgPath: relocation target JZzQivnl.eVciBQeI not defined
	[...]

The fix consists of two steps. First, if we're building the runtime and
listPackage fails on a package, that means we ran into scenario 1 above.
To avoid the inconsistency, we fill ListedPackages with "go list [...] std".
This means we'll always build runtime as described in scenario 2 above.

Second, when building packages other than the runtime,
we only allow listPackage to succeed if we're listing a dependency of
the current package.
This ensures we won't run into similar reproducibility bugs in the future.

Finally, re-enable test-gotip on CI since this was the last test flake.

											
										
										
											2 years ago
+									if err != nil {
 										return nil, err
 									}
-												tidy our build ID hash code a bit

First, rename "component" to "hash", since it's shorter and more useful.
A full build ID is two or four hashes joined with slashes.

Second, add sanity checks that buildIDHashLength is being followed.
Otherwise the use of []byte could lead to human error.

Third, move all the hash encoding and decoding logic together.

											
										
										
											1 year ago
+									sharedCache.BinaryContentID = decodeBuildIDHash(splitContentID(binaryBuildID))
-												ensure the runtime is built in a reproducible way

We went to great lengths to ensure garble builds are reproducible.
This includes how the tool itself works,
as its behavior should be the same given the same inputs.

However, we made one crucial mistake with the runtime package.
It has go:linkname directives pointing at other packages,
and some of those pointed packages aren't its dependencies.

Imagine two scenarios where garble builds the runtime package:

1) We run "garble build runtime". The way we handle linkname directives
   calls listPackage on the target package, to obfuscate the target's
   import path and object name. However, since we only obtained build
   info of runtime and its deps, calls for some linknames such as
   listPackage("sync/atomic") will fail. The linkname directive will
   leave its target untouched.

2) We run "garble build std". Unlike the first scenario, all listPackage
   calls issued by runtime's linkname directives will succeed, so its
   linkname directive targets will be obfuscated.

At best, this can result in inconsistent builds, depending on how the
runtime package was built. At worst, the mismatching object names can
result in errors at link time, if the target packages are actually used.

The modified test reproduces the worst case scenario reliably,
when the fix is reverted:

	> env GOCACHE=${WORK}/gocache-empty
	> garble build -a runtime
	> garble build -o=out_rebuild ./stdimporter
	[stderr]
	# test/main/stdimporter
	JZzQivnl.NtQJu0H3: relocation target JZzQivnl.iioHinYT not defined
	JZzQivnl.NtQJu0H3.func9: relocation target JZzQivnl.yz5z0NaH not defined
	JZzQivnl.(*ypvqhKiQ).String: relocation target JZzQivnl.eVciBQeI not defined
	JZzQivnl.(*ypvqhKiQ).PkgPath: relocation target JZzQivnl.eVciBQeI not defined
	[...]

The fix consists of two steps. First, if we're building the runtime and
listPackage fails on a package, that means we ran into scenario 1 above.
To avoid the inconsistency, we fill ListedPackages with "go list [...] std".
This means we'll always build runtime as described in scenario 2 above.

Second, when building packages other than the runtime,
we only allow listPackage to succeed if we're listing a dependency of
the current package.
This ensures we won't run into similar reproducibility bugs in the future.

Finally, re-enable test-gotip on CI since this was the last test flake.

											
										
										
											2 years ago
-												slightly simplify how we deal with linknamed runtime deps

Obfuscating the runtime only needs to list the linknamed packages,
and doesn't need to know about their dependencies directly.

Refactor the script to return a "flat" list that includes all packages
we need, except those that we know the runtime already pulled in.

This allows us to simplify the script and avoid passing -deps to cmd/go.
Performance is unaffected, but I reckon it's worthwhile given how much
we simplified the script.

Longer term, it's also best to avoid using -deps when we don't need it,
as cmd/go could avoid computing information we don't need.

	name              old time/op       new time/op       delta
	Build/NoCache-16        1.68s ± 1%        1.68s ± 0%    ~     (p=1.000 n=5+5)

	name              old bin-B         new bin-B         delta
	Build/NoCache-16        6.72M ± 0%        6.72M ± 0%  +0.01%  (p=0.008 n=5+5)

	name              old sys-time/op   new sys-time/op   delta
	Build/NoCache-16        1.88s ± 1%        1.89s ± 2%    ~     (p=0.548 n=5+5)

	name              old user-time/op  new user-time/op  delta
	Build/NoCache-16        19.9s ± 1%        19.8s ± 0%    ~     (p=0.421 n=5+5)

											
										
										
											2 years ago
+									if err := appendListedPackages(args, true); err != nil {
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+										return nil, err
 									}
-												start using original action IDs (#251)

When we obfuscate a name, what we do is hash the name with the action ID
of the package that contains the name. To ensure that the hash changes
if the garble tool changes, we used the action ID of the obfuscated
build, which is different than the original action ID, as we include
garble's own content ID in "go tool compile -V=full" via -toolexec.

Let's call that the "obfuscated action ID". Remember that a content ID
is roughly the hash of a binary or object file, and an action ID
contains the hash of a package's source code plus the content IDs of its
dependencies.

This had the advantage that it did what we wanted. However, it had one
massive drawback: when we compile a package, we only have the obfuscated
action IDs of its dependencies. This is because one can't have the
content ID of dependent packages before they are built.

Usually, this is not a problem, because hashing a foreign name means it
comes from a dependency, where we already have the obfuscated action ID.
However, that's not always the case.

First, go:linkname directives can point to any symbol that ends up in
the binary, even if the package is not a dependency. So garble could
only support linkname targets belonging to dependencies. This is at the
root of why we could not obfuscate the runtime; it contains linkname
directives targeting the net package, for example, which depends on runtime.

Second, some other places did not have an easy access to obfuscated
action IDs, like transformAsm, which had to recover it from a temporary
file stored by transformCompile.

Plus, this was all pretty expensive, as each toolexec sub-process had to
make repeated calls to buildidOf with the object files of dependencies.
We even had to use extra calls to "go list" in the case of indirect
dependencies, as their export files do not appear in importcfg files.

All in all, the old method was complex and expensive. A better mechanism
is to use the original action IDs directly, as listed by "go list"
without garble in the picture.

This would mean that the hashing does not change if garble changes,
meaning weaker obfuscation. To regain that property, we define the
"garble action ID", which is just the original action ID hashed together
with garble's own content ID.

This is practically the same as the obfuscated build ID we used before,
but since it doesn't go through "go tool compile -V=full" and the
obfuscated build itself, we can work out *all* the garble action IDs
upfront, before the obfuscated build even starts.

This fixes all of our problems. Now we know all garble build IDs
upfront, so a bunch of hacks can be entirely removed. Plus, since we
know them upfront, we can also cache them and avoid repeated calls to
"go tool buildid".

While at it, make use of the new BuildID field in Go 1.16's "list -json
-export". This avoids the vast majority of "go tool buildid" calls, as
the only ones that remain are 2 on the garble binary itself.

The numbers for Go 1.16 look very good:

	name     old time/op       new time/op       delta
	Build-8        146ms ± 4%        101ms ± 1%  -31.01%  (p=0.002 n=6+6)

	name     old bin-B         new bin-B         delta
	Build-8        6.61M ± 0%        6.60M ± 0%   -0.09%  (p=0.002 n=6+6)

	name     old sys-time/op   new sys-time/op   delta
	Build-8        321ms ± 7%        202ms ± 6%  -37.11%  (p=0.002 n=6+6)

	name     old user-time/op  new user-time/op  delta
	Build-8        538ms ± 4%        414ms ± 4%  -23.12%  (p=0.002 n=6+6)
											
										
										
											3 years ago
+									sharedTempDir, err = saveSharedCache()
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+									if err != nil {
 										return nil, err
 									}
 									os.Setenv("GARBLE_SHARED", sharedTempDir)
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+									wd, err := os.Getwd()
 									if err != nil {
 										return nil, err
 									}
 									os.Setenv("GARBLE_PARENT_WORK", wd)
 									if flagDebugDir != "" {
 										if !filepath.IsAbs(flagDebugDir) {
 											flagDebugDir = filepath.Join(wd, flagDebugDir)
 										}
-												minor code tidying up

We don't need to nest the RemoveAll and MkdirAll calls for debugdir.

Fill the string for -toolexec= when we actually append it to goArgs.

Consistently use the objectString type alias.

Remove unnecessary parameter names in transformFuncs.

Unindent the code for switch variables by reversing the conditional.

											
										
										
											2 years ago
+										if err := os.RemoveAll(flagDebugDir); err != nil {
 											return nil, fmt.Errorf("could not empty debugdir: %v", err)
 										}
 										if err := os.MkdirAll(flagDebugDir, 0o755); err != nil {
 											return nil, err
-												unify the definition and storage of flag values

The parent garble process parses the original flags,
as provided by the user via the command line.
Previously, those got stored in the shared cache file,
so that child processes spawned by toolexec could see them.

Unfortunately, this made the code relatively easy to misuse.
A child process would always see flagLiterals as zero value,
given that it should never see such a flag argument directly.
Similarly, one would have to be careful with cached options,
as they could only be consumed after the cache file is loaded.

Simplify the situation by deduplicating the storage of flags.
Now, the parent passes all flags onto children via toolexec.

One exception is GarbleDir, which now becomes an env var.
This seems in line with other top-level dirs like GARBLE_SHARED.

Finally, we turn -seed into a flag.Value,
which lets us implement its "set" behavior as part of flag.Parse.

Overall, we barely reduce the amount of code involved,
but we certainly remove a couple of footguns.
As part of the cleanup, we also introduce appendFlags.

											
										
										
											3 years ago
+										}
 									}
-												use fewer build flags when building std or cmd

When we use `go list` on the standard library, we need to be careful
about what flags are passed from the top-level build command,
because some flags are not going to be appropriate.
In particular, GOFLAGS=-modfile=... resulted in a failure,
reproduced via the GOFLAGS variable added to linker.txtar:

	go: inconsistent vendoring in /home/mvdan/tip/src:
		golang.org/x/crypto@v0.5.1-0.20230203195927-310bfa40f1e4: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod
		golang.org/x/net@v0.7.0: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod
		golang.org/x/sys@v0.5.1-0.20230208141308-4fee21c92339: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod
		golang.org/x/text@v0.7.1-0.20230207171107-30dadde3188b: is marked as explicit in vendor/modules.txt, but not explicitly required in go.mod

		To ignore the vendor directory, use -mod=readonly or -mod=mod.
		To sync the vendor directory, run:
			go mod vendor

To work around this problem, reset the -mod and -modfile flags when
calling "go list" on the standard library, as those are the only two
flags which alter how we load the main module in a build.

The code which builds a modified cmd/link has a similar problem;
it already reset GOOS and GOARCH, but it could similarly run into
problems if other env vars like GOFLAGS were set.
To be on the safe side, we also disable GOENV and GOEXPERIMENT,
which we borrow from Go's bootstrapping commands.

											
										
										
											1 year ago
+									goArgs := append([]string{command}, garbleBuildFlags...)
-												minor code tidying up

We don't need to nest the RemoveAll and MkdirAll calls for debugdir.

Fill the string for -toolexec= when we actually append it to goArgs.

Consistently use the objectString type alias.

Remove unnecessary parameter names in transformFuncs.

Unindent the code for switch variables by reversing the conditional.

											
										
										
											2 years ago
 									// Pass the garble flags down to each toolexec invocation.
 									// This way, all garble processes see the same flag values.
-												use -toolexec="garble toolexec"

This way, the child process knows that it's running a toolchain command
via -toolexec without having to guess via filepath.IsAbs.

While here, improve the docs and tests a bit.

											
										
										
											2 years ago
+									// Note that we can end up with a single argument to `go` in the form of:
 									//
 									//	-toolexec='/binary dir/garble' -tiny toolexec
 									//
 									// We quote the absolute path to garble if it contains spaces.
 									// We can add extra flags to the end of the same -toolexec argument.
-												minor code tidying up

We don't need to nest the RemoveAll and MkdirAll calls for debugdir.

Fill the string for -toolexec= when we actually append it to goArgs.

Consistently use the objectString type alias.

Remove unnecessary parameter names in transformFuncs.

Unindent the code for switch variables by reversing the conditional.

											
										
										
											2 years ago
+									var toolexecFlag strings.Builder
 									toolexecFlag.WriteString("-toolexec=")
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+									quotedExecPath, err := cmdgoQuotedJoin([]string{sharedCache.ExecPath})
-												properly quote the path to garble in -toolexec

If we don't quote it, paths containing spaces or quote characters will
fail. For instance, the added test without the fix fails:

        > env NAME='with spaces'
        > mkdir $NAME
        > cp $EXEC_PATH $NAME/garble$exe
        > exec $NAME/garble$exe build main.go
        [stderr]
        go tool compile: fork/exec $WORK/with: no such file or directory
        exit status 1

Luckily, the fix is easy: we bundle Go's cmd/internal/quoted package,
which implements a QuotedJoin API for this very purpose.

Fixes #544.

											
										
										
											2 years ago
+									if err != nil {
 										// Can only happen if the absolute path to the garble binary contains
 										// both single and double quotes. Seems extremely unlikely.
 										return nil, err
 									}
 									toolexecFlag.WriteString(quotedExecPath)
-												minor code tidying up

We don't need to nest the RemoveAll and MkdirAll calls for debugdir.

Fill the string for -toolexec= when we actually append it to goArgs.

Consistently use the objectString type alias.

Remove unnecessary parameter names in transformFuncs.

Unindent the code for switch variables by reversing the conditional.

											
										
										
											2 years ago
+									appendFlags(&toolexecFlag, false)
-												use -toolexec="garble toolexec"

This way, the child process knows that it's running a toolchain command
via -toolexec without having to guess via filepath.IsAbs.

While here, improve the docs and tests a bit.

											
										
										
											2 years ago
+									toolexecFlag.WriteString(" toolexec")
-												update cmd/go flags for 1.18

As of 1.18beta1. I used bash commands like:

	diff -u <(go1.17.5 help build) <(gotip help build)

While here, supply -buildinfo=false and -buildvcs=false to go build.
We already remove that information by discarding the _gomod_.go file,
but we might as well pass the flags too.
If anything, it lets the toolchain avoid the work entirely.
Note that we can't use these flags on Go 1.17 for now, though.

Add a TODO that came to mind while writing this, too.

											
										
										
											2 years ago
+									goArgs = append(goArgs, toolexecFlag.String())
-												minor code tidying up

We don't need to nest the RemoveAll and MkdirAll calls for debugdir.

Fill the string for -toolexec= when we actually append it to goArgs.

Consistently use the objectString type alias.

Remove unnecessary parameter names in transformFuncs.

Unindent the code for switch variables by reversing the conditional.

											
										
										
											2 years ago
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									if flagControlFlow {
 										goArgs = append(goArgs, "-debug-actiongraph", filepath.Join(sharedTempDir, actionGraphFileName))
 									}
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+									if flagDebugDir != "" {
 										// In case the user deletes the debug directory,
 										// and a previous build is cached,
 										// rebuild all packages to re-fill the debug dir.
 										goArgs = append(goArgs, "-a")
 									}
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+									if command == "test" {
-												use "obfuscate" instead of "garble" in some more places

Mainly comments. "garble" refers to the tool, but the verb and adjective
is more intuitive as "obfuscate" and "obfuscated" instead of "garble"
and "garbled".

											
										
										
											3 years ago
+										// vet is generally not useful on obfuscated code; keep it
-												initial support for reversing panic output (#225)

For now, this only implements reversing of exported names which are
hashed with action IDs. Many other kinds of obfuscation, like positions
and private names, are not yet implemented.

Note that we don't document this new command yet on purpose, since it's
not finished.

Some other minor cleanups were done for future changes, such as making
transformLineInfo into a method that also receives the original
filename, and making header names more self-describing.

Updates #5.
											
										
										
											3 years ago
+										// disabled by default.
 										goArgs = append(goArgs, "-vet=off")
 									}
 									goArgs = append(goArgs, flags...)
 									goArgs = append(goArgs, args...)
 									return exec.Command("go", goArgs...), nil
 								}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+								var transformMethods = map[string]func(*transformer, []string) ([]string, error){
 									"asm":     (*transformer).transformAsm,
 									"compile": (*transformer).transformCompile,
 									"link":    (*transformer).transformLink,
-												start enforcing the link flags -w -s

											
										
										
											5 years ago
+								}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+								func (tf *transformer) transformAsm(args []string) ([]string, error) {
-												replace the -p path when assembling (#247)

The asm tool runs twice for a package with assembly. The second time it
does, the path given to the -p flag matters, just like in the compiler,
as we generate an object file.

We don't have a -buildid flag in the asm tool, so obtaining the action
ID to obfuscate the package path with is a bit tricky. We store it from
transformCompile, and read it from transformAsm. See the detailed docs
for more.

This was the last "skip" line in the tests due to Go 1.16. After all PRs
are merged, one last PR documenting that 1.16 is supported will be sent,
closing the issue for good.

It's unclear why this wasn't an issue in Go 1.15. My best guess is that
the ABI changes only happened in Go 1.16, and this causes exported asm
funcs to start showing up in object files with their package paths.

Updates #124.
											
										
										
											3 years ago
+									flags, paths := splitFlagsFromFiles(args, ".s")
-												concentrate and simplify "to obfuscate" logic

Back in the day, we used to call toObfuscate anytime we needed to know
whether a package should be obfuscated.
More recently, we started computing via the ToObfuscate field,
which then gets shared with all sub-processes via sharedCache.

We still had two places that directly called toObfuscate.
Replace those with ToObfuscate, and inline toObfuscate into shared.go.

obfuscatedImportPath is also a potential footgun for main packages.
Some use cases always want the original "main" package name,
such as for use in the compiler's "-p main" flag,
while other cases want the obfuscated package import path,
such as the entries in importcfg files.

Since each of these call sites handles the edge case well,
obfuscatedImportPath now panics on main packages to avoid any misuse.

Finally, test that we never leak main package paths via ldflags.txt.
We never did, but it's good to make sure.

Overall, this avoids confusion and trims the size of main.go a bit.

											
										
										
											2 years ago
+									// When assembling, the import path can make its way into the output object file.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									if tf.curPkg.Name != "main" && tf.curPkg.ToObfuscate {
 										flags = flagSetValue(flags, "-p", tf.curPkg.obfuscatedImportPath())
-												replace the -p path when assembling (#247)

The asm tool runs twice for a package with assembly. The second time it
does, the path given to the -p flag matters, just like in the compiler,
as we generate an object file.

We don't have a -buildid flag in the asm tool, so obtaining the action
ID to obfuscate the package path with is a bit tricky. We store it from
transformCompile, and read it from transformAsm. See the detailed docs
for more.

This was the last "skip" line in the tests due to Go 1.16. After all PRs
are merged, one last PR documenting that 1.16 is supported will be sent,
closing the issue for good.

It's unclear why this wasn't an issue in Go 1.15. My best guess is that
the ABI changes only happened in Go 1.16, and this causes exported asm
funcs to start showing up in object files with their package paths.

Updates #124.
											
										
										
											3 years ago
+									}
-												remove unused code spotted by -coverprofile

Remove some asthelper APIs that haven't been used for some time.
They can be recovered from the git history if needed again.

One type assertion in the literals package is always true.

Embedded field objects are handled near the top of transformGo, so the
extra !obj.Embedded() check was always true. Remove it.

We always obfuscate standalone funcs now, so the obfuscatedTypesPackage
check is no longer necessary. This was necessary when we used to not
obfuscate func names when they were used in linkname directives.

The workaround for test package imports in obfuscatedTypesPackage I had
to add a few commits ago no longer seems to be necessary. This might be
thanks to the simplification with functions in the paragraph just above.

It's impossible to run garble without -trimpath nowadays, as we error
before the build even starts:

	$ go build -toolexec=garble
	go tool compile: exit status 1
	cannot open shared file, this is most likely due to not running "garble [command]"

When run as "garble build", the trimpath flag is always set. So the
check in alterTrimpath never triggers anymore, and couldn't be tested.

Finally, simplify the handling of comment syntax in printFile, and add a
few TODOs for other code paths not covered by our existing tests.

Total code coverage is up from 90.3% to 91.0%.

											
										
										
											3 years ago
+									flags = alterTrimpath(flags)
-												avoid reproducibility issues with full rebuilds

We were using temporary filenames for modified Go and assembly files.
For example, an obfuscated "encoding/json/encode.go" would end up as:

	/tmp/garble-shared123/encode.go.456.go

where "123" and "456" are random numbers, usually longer.

This was usually fine for two reasons:

1) We would add "/tmp/garble-shared123/" to -trimpath, so the temporary
   directory and its random number would be invisible.

2) We would add "//line" directives to the source files, replacing
   the filename with obfuscated versions excluding any random number.

Unfortunately, this broke in multiple ways. Most notably, assembly files
do not have any line directives, and it's not clear that there's any
support for them. So the random number in their basename could end up in
the binary, breaking reproducibility.

Another issue is that the -trimpath addition described above was only
done for cmd/compile, not cmd/asm, so assembly filenames included the
randomized temporary directory.

To fix the issues above, the same "encoding/json/encode.go" would now
end up as:

	/tmp/garble-shared123/encoding/json/encode.go

Such a path is still unique even though the "456" random number is gone,
as import paths are unique within a single build.

This fixes issues with the base name of each file, so we no longer rely
on line directives as the only way to remove the second original random
number.

We still rely on -trimpath to get rid of the temporary directory in
filenames. To fix its problem with assembly files, also amend the
-trimpath flag when running the assembler tool.

Finally, add a test that reproducible builds still work when a full
rebuild is done. We choose goprivate.txt for such a test as its
stdimporter package imports a number of std packages, including uses of
assembly and cgo.

For the time being, we don't use such a "full rebuild" reproducibility
test in other test scripts, as this step is expensive, rebuilding many
packages from scratch.

This issue went unnoticed for over a year because such random numbers
"123" and "456" were created when a package was obfuscated, and that
only happened once per package version as long as the build cache was
kept intact.

When clearing the build cache, or forcing a rebuild with -a, one gets
new random numbers, and thus a different binary resulting from the same
build input. That's not something that most users would do regularly,
and our tests did not cover that edge case either, until now.

Fixes #328.

											
										
										
											3 years ago
-												obfuscate all assembly filenames

We were still leaking the filenames for assembly files.
In our existing asm.txtar test's output binary,
the string `test/main/garble_main_amd64.s` was present.
This leaked full import paths on one hand,
and the filenames of each assembly file on the other.

We avoid this in Go files by using `/*line` directives,
but those are not supported in assembly files.
Instead, obfuscate the paths in the temporary directory.
Note that we still need a separate temporary directory per package,
because otherwise any included header files might collide.

We must remove the `main` package panic in obfuscatedImportPath,
as we now need to use that function for all packages.

While here, remove the outdated comment about `-trimpath`.

Fixes #605.

											
										
										
											2 years ago
+									// The assembler runs twice; the first with -gensymabis,
 									// where we continue below and we obfuscate all the source.
 									// The second time, without -gensymabis, we reconstruct the paths to the
 									// obfuscated source files and reuse them to avoid work.
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
+									newPaths := make([]string, 0, len(paths))
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+									if !slices.Contains(args, "-gensymabis") {
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
+										for _, path := range paths {
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+											name := hashWithPackage(tf.curPkg, filepath.Base(path)) + ".s"
 											pkgDir := filepath.Join(sharedTempDir, tf.curPkg.obfuscatedImportPath())
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
+											newPath := filepath.Join(pkgDir, name)
 											newPaths = append(newPaths, newPath)
 										}
 										return append(flags, newPaths...), nil
 									}
-												obfuscate Go names in asm header files

Assembly files can include header files within the same Go module,
and those header files can include "defines" which refer to Go names.

Since those Go names are likely being obfuscated,
we need to replace them just like we do in assembly files.

The added mechanism is rather basic; we add two TODOs to improve it.
This should help when building projects like go-ethereum.

Fixes #553.

											
										
										
											2 years ago
+									const missingHeader = "missing header path"
 									newHeaderPaths := make(map[string]string)
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+									var buf, includeBuf bytes.Buffer
-												obfuscate asm function names as well (#273)

Historically, it was impossible to rename those funcs as the
implementation was in assembly files, and we only transformed Go code.

Now that transformAsm exists, it's only about fifty lines to do some
very basic parsing and rewriting of assembly files.

This fixes the obfuscated builds of multiple std packages, including a
few dependencies of net/http, since they included assembly funcs which
called pure Go functions. Those pure Go functions had their names
obfuscated, breaking the call sites in assembly.

Fixes #258.
Fixes #261.
											
										
										
											3 years ago
+									for _, path := range paths {
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+										buf.Reset()
 										f, err := os.Open(path)
-												obfuscate asm function names as well (#273)

Historically, it was impossible to rename those funcs as the
implementation was in assembly files, and we only transformed Go code.

Now that transformAsm exists, it's only about fifty lines to do some
very basic parsing and rewriting of assembly files.

This fixes the obfuscated builds of multiple std packages, including a
few dependencies of net/http, since they included assembly funcs which
called pure Go functions. Those pure Go functions had their names
obfuscated, breaking the call sites in assembly.

Fixes #258.
Fixes #261.
											
										
										
											3 years ago
+										if err != nil {
 											return nil, err
 										}
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+										defer f.Close() // in case of error
 										scanner := bufio.NewScanner(f)
 										for scanner.Scan() {
 											line := scanner.Text()
-												support inline comments in asm #include lines

That is, the assembly line

    #include "foo.h" // bar

would make garble run into an error, as we would try to parse
the #include directive before we stripped comments,
effectively trying to unquote the string

    "foo.h" // bar

rather than just the included filename

    "foo.h"

Add test cases for asm but also cgo, while at it.

Fixes #812.

											
										
										
											5 months ago
+											// Whole-line comments might be directives, leave them in place.
 											// For example: //go:build race
 											// Any other comment, including inline ones, can be discarded entirely.
 											line, comment, hasComment := strings.Cut(line, "//")
 											if hasComment && line == "" {
 												buf.WriteString("//")
 												buf.WriteString(comment)
 												buf.WriteByte('\n')
 												continue
 											}
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
-												support inline comments in asm #include lines

That is, the assembly line

    #include "foo.h" // bar

would make garble run into an error, as we would try to parse
the #include directive before we stripped comments,
effectively trying to unquote the string

    "foo.h" // bar

rather than just the included filename

    "foo.h"

Add test cases for asm but also cgo, while at it.

Fixes #812.

											
										
										
											5 months ago
+											// Preprocessor lines to include another file.
 											// For example: #include "foo.h"
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+											if quoted := strings.TrimPrefix(line, "#include"); quoted != line {
 												quoted = strings.TrimSpace(quoted)
 												path, err := strconv.Unquote(quoted)
-												support inline comments in asm #include lines

That is, the assembly line

    #include "foo.h" // bar

would make garble run into an error, as we would try to parse
the #include directive before we stripped comments,
effectively trying to unquote the string

    "foo.h" // bar

rather than just the included filename

    "foo.h"

Add test cases for asm but also cgo, while at it.

Fixes #812.

											
										
										
											5 months ago
+												if err != nil { // note that strconv.Unquote errors do not include the input string
 													return nil, fmt.Errorf("cannot unquote %q: %v", quoted, err)
-												fix windows/arm cross-build linking

Obfuscating some std packages for windows/arm triggered a bug; when
encountering a call to runtime·memmove, we'd hash "memmove" with the
current package's action ID.

This is wrong on two levels: First, we aren't obfuscating the runtime
package yet. And second, if we did, we would have to hash the symbol
appropriately, with that package's action ID.

For now, only hashing the local names does the trick. That's all that
the code currently supports, anyway.

											
										
										
											3 years ago
+												}
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+												newPath := newHeaderPaths[path]
 												switch newPath {
 												case missingHeader: // no need to try again
 													buf.WriteString(line)
 													buf.WriteByte('\n')
 													continue
 												case "": // first time we see this header
 													includeBuf.Reset()
 													content, err := os.ReadFile(path)
 													if errors.Is(err, fs.ErrNotExist) {
 														newHeaderPaths[path] = missingHeader
 														buf.WriteString(line)
 														buf.WriteByte('\n')
 														continue // a header file provided by Go or the system
 													} else if err != nil {
 														return nil, err
 													}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+													tf.replaceAsmNames(&includeBuf, content)
-												fix windows/arm cross-build linking

Obfuscating some std packages for windows/arm triggered a bug; when
encountering a call to runtime·memmove, we'd hash "memmove" with the
current package's action ID.

This is wrong on two levels: First, we aren't obfuscating the runtime
package yet. And second, if we did, we would have to hash the symbol
appropriately, with that package's action ID.

For now, only hashing the local names does the trick. That's all that
the code currently supports, anyway.

											
										
										
											3 years ago
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+													// For now, we replace `foo.h` or `dir/foo.h` with `garbled_foo.h`.
 													// The different name ensures we don't use the unobfuscated file.
 													// This is far from perfect, but does the job for the time being.
 													// In the future, use a randomized name.
 													basename := filepath.Base(path)
 													newPath = "garbled_" + basename
-												obfuscate asm function names as well (#273)

Historically, it was impossible to rename those funcs as the
implementation was in assembly files, and we only transformed Go code.

Now that transformAsm exists, it's only about fifty lines to do some
very basic parsing and rewriting of assembly files.

This fixes the obfuscated builds of multiple std packages, including a
few dependencies of net/http, since they included assembly funcs which
called pure Go functions. Those pure Go functions had their names
obfuscated, breaking the call sites in assembly.

Fixes #258.
Fixes #261.
											
										
										
											3 years ago
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+													if _, err := tf.writeSourceFile(basename, newPath, includeBuf.Bytes()); err != nil {
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+														return nil, err
 													}
 													newHeaderPaths[path] = newPath
-												obfuscate Go names in asm header files

Assembly files can include header files within the same Go module,
and those header files can include "defines" which refer to Go names.

Since those Go names are likely being obfuscated,
we need to replace them just like we do in assembly files.

The added mechanism is rather basic; we add two TODOs to improve it.
This should help when building projects like go-ethereum.

Fixes #553.

											
										
										
											2 years ago
+												}
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+												buf.WriteString("#include ")
 												buf.WriteString(strconv.Quote(newPath))
 												buf.WriteByte('\n')
 												continue
 											}
 											// Anything else is regular assembly; replace the names.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+											tf.replaceAsmNames(&buf, []byte(line))
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+											buf.WriteByte('\n')
 										}
 										if err := scanner.Err(); err != nil {
 											return nil, err
-												obfuscate asm function names as well (#273)

Historically, it was impossible to rename those funcs as the
implementation was in assembly files, and we only transformed Go code.

Now that transformAsm exists, it's only about fifty lines to do some
very basic parsing and rewriting of assembly files.

This fixes the obfuscated builds of multiple std packages, including a
few dependencies of net/http, since they included assembly funcs which
called pure Go functions. Those pure Go functions had their names
obfuscated, breaking the call sites in assembly.

Fixes #258.
Fixes #261.
											
										
										
											3 years ago
+										}
-												obfuscate all assembly filenames

We were still leaking the filenames for assembly files.
In our existing asm.txtar test's output binary,
the string `test/main/garble_main_amd64.s` was present.
This leaked full import paths on one hand,
and the filenames of each assembly file on the other.

We avoid this in Go files by using `/*line` directives,
but those are not supported in assembly files.
Instead, obfuscate the paths in the temporary directory.
Note that we still need a separate temporary directory per package,
because otherwise any included header files might collide.

We must remove the `main` package panic in obfuscatedImportPath,
as we now need to use that function for all packages.

While here, remove the outdated comment about `-trimpath`.

Fixes #605.

											
										
										
											2 years ago
+										// With assembly files, we obfuscate the filename in the temporary
 										// directory, as assembly files do not support `/*line` directives.
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+										// TODO(mvdan): per cmd/asm/internal/lex, they do support `#line`.
-												teach -debugdir to produce assembly files too

Up to this point, -debugdir only included obfuscated Go files.
Include assembly files and their headers as well.
While here, ensure that -debugdir supports the standard library too,
and that it behaves correctly with build tags as well.

Also use more consistent names for path strings, to clarify which are
only the basename, and which are the obfuscated version.

											
										
										
											2 years ago
+										basename := filepath.Base(path)
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										newName := hashWithPackage(tf.curPkg, basename) + ".s"
 										if path, err := tf.writeSourceFile(basename, newName, buf.Bytes()); err != nil {
-												obfuscate asm function names as well (#273)

Historically, it was impossible to rename those funcs as the
implementation was in assembly files, and we only transformed Go code.

Now that transformAsm exists, it's only about fifty lines to do some
very basic parsing and rewriting of assembly files.

This fixes the obfuscated builds of multiple std packages, including a
few dependencies of net/http, since they included assembly funcs which
called pure Go functions. Those pure Go functions had their names
obfuscated, breaking the call sites in assembly.

Fixes #258.
Fixes #261.
											
										
										
											3 years ago
+											return nil, err
-												add a writeTemp helper func

We had two nearly identical copies of the code to open a temp file with
CreateTemp, write to it, and handle closing properly. Unify them.

With the added docs this isn't exactly a net win, but we want the long
funcs such as transformCompile to be easy to follow, and this helps.

											
										
										
											3 years ago
+										} else {
 											newPaths = append(newPaths, path)
-												obfuscate asm function names as well (#273)

Historically, it was impossible to rename those funcs as the
implementation was in assembly files, and we only transformed Go code.

Now that transformAsm exists, it's only about fifty lines to do some
very basic parsing and rewriting of assembly files.

This fixes the obfuscated builds of multiple std packages, including a
few dependencies of net/http, since they included assembly funcs which
called pure Go functions. Those pure Go functions had their names
obfuscated, breaking the call sites in assembly.

Fixes #258.
Fixes #261.
											
										
										
											3 years ago
+										}
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+										f.Close() // do not keep len(paths) files open
-												add a writeTemp helper func

We had two nearly identical copies of the code to open a temp file with
CreateTemp, write to it, and handle closing properly. Unify them.

With the added docs this isn't exactly a net win, but we want the long
funcs such as transformCompile to be easy to follow, and this helps.

											
										
										
											3 years ago
+									}
-												obfuscate asm function names as well (#273)

Historically, it was impossible to rename those funcs as the
implementation was in assembly files, and we only transformed Go code.

Now that transformAsm exists, it's only about fifty lines to do some
very basic parsing and rewriting of assembly files.

This fixes the obfuscated builds of multiple std packages, including a
few dependencies of net/http, since they included assembly funcs which
called pure Go functions. Those pure Go functions had their names
obfuscated, breaking the call sites in assembly.

Fixes #258.
Fixes #261.
											
										
										
											3 years ago
-												add a writeTemp helper func

We had two nearly identical copies of the code to open a temp file with
CreateTemp, write to it, and handle closing properly. Unify them.

With the added docs this isn't exactly a net win, but we want the long
funcs such as transformCompile to be easy to follow, and this helps.

											
										
										
											3 years ago
+									return append(flags, newPaths...), nil
 								}
-												obfuscate asm function names as well (#273)

Historically, it was impossible to rename those funcs as the
implementation was in assembly files, and we only transformed Go code.

Now that transformAsm exists, it's only about fifty lines to do some
very basic parsing and rewriting of assembly files.

This fixes the obfuscated builds of multiple std packages, including a
few dependencies of net/http, since they included assembly funcs which
called pure Go functions. Those pure Go functions had their names
obfuscated, breaking the call sites in assembly.

Fixes #258.
Fixes #261.
											
										
										
											3 years ago
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+								func (tf *transformer) replaceAsmNames(buf *bytes.Buffer, remaining []byte) {
-												obfuscate Go names in asm header files

Assembly files can include header files within the same Go module,
and those header files can include "defines" which refer to Go names.

Since those Go names are likely being obfuscated,
we need to replace them just like we do in assembly files.

The added mechanism is rather basic; we add two TODOs to improve it.
This should help when building projects like go-ethereum.

Fixes #553.

											
										
										
											2 years ago
+									// We need to replace all function references with their obfuscated name
 									// counterparts.
 									// Luckily, all func names in Go assembly files are immediately followed
 									// by the unicode "middle dot", like:
 									//
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+									//	TEXT ·privateAdd(SB),$0-24
 									//	TEXT runtime∕internal∕sys·Ctz64(SB), NOSPLIT, $0-12
-												support referencing package paths with periods in assembly

The test case is lifted from github.com/bytedance/sonic/internal/native,
which had the following line:

	JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__quote(SB)

Updates #621.

											
										
										
											1 year ago
+									//
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+									// Note that import paths in assembly, like `runtime∕internal∕sys` above,
-												support referencing package paths with periods in assembly

The test case is lifted from github.com/bytedance/sonic/internal/native,
which had the following line:

	JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__quote(SB)

Updates #621.

											
										
										
											1 year ago
+									// use Unicode periods and slashes rather than the ASCII ones used by `go list`.
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+									// We need to convert to ASCII to find the right package information.
-												support referencing package paths with periods in assembly

The test case is lifted from github.com/bytedance/sonic/internal/native,
which had the following line:

	JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__quote(SB)

Updates #621.

											
										
										
											1 year ago
+									const (
 										asmPeriod = '·'
 										goPeriod  = '.'
 										asmSlash  = '∕'
 										goSlash   = '/'
 									)
 									asmPeriodLen := utf8.RuneLen(asmPeriod)
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
-												obfuscate Go names in asm header files

Assembly files can include header files within the same Go module,
and those header files can include "defines" which refer to Go names.

Since those Go names are likely being obfuscated,
we need to replace them just like we do in assembly files.

The added mechanism is rather basic; we add two TODOs to improve it.
This should help when building projects like go-ethereum.

Fixes #553.

											
										
										
											2 years ago
+									for {
-												support referencing package paths with periods in assembly

The test case is lifted from github.com/bytedance/sonic/internal/native,
which had the following line:

	JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__quote(SB)

Updates #621.

											
										
										
											1 year ago
+										periodIdx := bytes.IndexRune(remaining, asmPeriod)
 										if periodIdx < 0 {
-												obfuscate Go names in asm header files

Assembly files can include header files within the same Go module,
and those header files can include "defines" which refer to Go names.

Since those Go names are likely being obfuscated,
we need to replace them just like we do in assembly files.

The added mechanism is rather basic; we add two TODOs to improve it.
This should help when building projects like go-ethereum.

Fixes #553.

											
										
										
											2 years ago
+											buf.Write(remaining)
 											remaining = nil
 											break
 										}
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+										// The package name ends at the first rune which cannot be part of a Go
 										// import path, such as a comma or space.
-												support referencing package paths with periods in assembly

The test case is lifted from github.com/bytedance/sonic/internal/native,
which had the following line:

	JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__quote(SB)

Updates #621.

											
										
										
											1 year ago
+										pkgStart := periodIdx
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+										for pkgStart >= 0 {
 											c, size := utf8.DecodeLastRune(remaining[:pkgStart])
-												support referencing package paths with periods in assembly

The test case is lifted from github.com/bytedance/sonic/internal/native,
which had the following line:

	JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__quote(SB)

Updates #621.

											
										
										
											1 year ago
+											if !unicode.IsLetter(c) && c != '_' && c != asmSlash && !unicode.IsDigit(c) {
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+												break
 											}
 											pkgStart -= size
 										}
-												support referencing package paths with periods in assembly

The test case is lifted from github.com/bytedance/sonic/internal/native,
which had the following line:

	JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__quote(SB)

Updates #621.

											
										
										
											1 year ago
+										// The package name might actually be longer, e.g:
 										//
 										//	JMP test∕with·many·dots∕main∕imported·PublicAdd(SB)
 										//
 										// We have `test∕with` so far; grab `·many·dots∕main∕imported` as well.
 										pkgEnd := periodIdx
 										lastAsmPeriod := -1
 										for i := pkgEnd + asmPeriodLen; i <= len(remaining); {
 											c, size := utf8.DecodeRune(remaining[i:])
 											if c == asmPeriod {
 												lastAsmPeriod = i
 											} else if !unicode.IsLetter(c) && c != '_' && c != asmSlash && !unicode.IsDigit(c) {
 												if lastAsmPeriod > 0 {
 													pkgEnd = lastAsmPeriod
 												}
 												break
 											}
 											i += size
 										}
 										asmPkgPath := string(remaining[pkgStart:pkgEnd])
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
 										// Write the bytes before our unqualified `·foo` or qualified `pkg·foo`.
 										buf.Write(remaining[:pkgStart])
 										// If the name was qualified, fetch the package, and write the
 										// obfuscated import path if needed.
-												support assembly references to package names

Go's package runtime/internal/atomic contains references to functions
which refer to the current package by its package name alone:

	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load(SB)
	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load64(SB)

We could only handle unqualified or fully qualified references, like:

	JMP ·Load64(SB)
	JMP runtime∕internal∕atomic·Load64(SB)

Apparently, all three forms are equally valid.
Add a test case and fix it.

I checked whether referencing an imported package by its name worked;
it does not seem to be the case.
This feature appears to be restricted to the current package alone.

While here, we only need goPkgPath when we need to call listPackage.

Fixes #619.

											
										
										
											1 year ago
+										// Note that we don't obfuscate the package path "main".
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										lpkg := tf.curPkg
-												drop bits of code to support Go 1.19

											
										
										
											1 year ago
+										if asmPkgPath != "" && asmPkgPath != "main" {
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+											if asmPkgPath != tf.curPkg.Name {
-												support assembly references to package names

Go's package runtime/internal/atomic contains references to functions
which refer to the current package by its package name alone:

	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load(SB)
	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load64(SB)

We could only handle unqualified or fully qualified references, like:

	JMP ·Load64(SB)
	JMP runtime∕internal∕atomic·Load64(SB)

Apparently, all three forms are equally valid.
Add a test case and fix it.

I checked whether referencing an imported package by its name worked;
it does not seem to be the case.
This feature appears to be restricted to the current package alone.

While here, we only need goPkgPath when we need to call listPackage.

Fixes #619.

											
										
										
											1 year ago
+												goPkgPath := asmPkgPath
 												goPkgPath = strings.ReplaceAll(goPkgPath, string(asmPeriod), string(goPeriod))
 												goPkgPath = strings.ReplaceAll(goPkgPath, string(asmSlash), string(goSlash))
 												var err error
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+												lpkg, err = listPackage(tf.curPkg, goPkgPath)
-												support assembly references to package names

Go's package runtime/internal/atomic contains references to functions
which refer to the current package by its package name alone:

	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load(SB)
	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load64(SB)

We could only handle unqualified or fully qualified references, like:

	JMP ·Load64(SB)
	JMP runtime∕internal∕atomic·Load64(SB)

Apparently, all three forms are equally valid.
Add a test case and fix it.

I checked whether referencing an imported package by its name worked;
it does not seem to be the case.
This feature appears to be restricted to the current package alone.

While here, we only need goPkgPath when we need to call listPackage.

Fixes #619.

											
										
										
											1 year ago
+												if err != nil {
 													panic(err) // shouldn't happen
 												}
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+											}
 											if lpkg.ToObfuscate {
-												support assembly references to package names

Go's package runtime/internal/atomic contains references to functions
which refer to the current package by its package name alone:

	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load(SB)
	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_loong64.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load64(SB)
	src/runtime/internal/atomic/atomic_mips64x.s:   JMP atomic·Load64(SB)

We could only handle unqualified or fully qualified references, like:

	JMP ·Load64(SB)
	JMP runtime∕internal∕atomic·Load64(SB)

Apparently, all three forms are equally valid.
Add a test case and fix it.

I checked whether referencing an imported package by its name worked;
it does not seem to be the case.
This feature appears to be restricted to the current package alone.

While here, we only need goPkgPath when we need to call listPackage.

Fixes #619.

											
										
										
											1 year ago
+												// Note that we don't need to worry about asmSlash here,
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+												// because our obfuscated import paths contain no slashes right now.
 												buf.WriteString(lpkg.obfuscatedImportPath())
 											} else {
 												buf.WriteString(asmPkgPath)
-												obfuscate Go names in asm header files

Assembly files can include header files within the same Go module,
and those header files can include "defines" which refer to Go names.

Since those Go names are likely being obfuscated,
we need to replace them just like we do in assembly files.

The added mechanism is rather basic; we add two TODOs to improve it.
This should help when building projects like go-ethereum.

Fixes #553.

											
										
										
											2 years ago
+											}
 										}
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+										// Write the middle dot and advance the remaining slice.
-												support referencing package paths with periods in assembly

The test case is lifted from github.com/bytedance/sonic/internal/native,
which had the following line:

	JMP github·com∕bytedance∕sonic∕internal∕native∕avx2·__quote(SB)

Updates #621.

											
										
										
											1 year ago
+										buf.WriteRune(asmPeriod)
 										remaining = remaining[pkgEnd+asmPeriodLen:]
-												obfuscate Go names in asm header files

Assembly files can include header files within the same Go module,
and those header files can include "defines" which refer to Go names.

Since those Go names are likely being obfuscated,
we need to replace them just like we do in assembly files.

The added mechanism is rather basic; we add two TODOs to improve it.
This should help when building projects like go-ethereum.

Fixes #553.

											
										
										
											2 years ago
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+										// The declared name ends at the first rune which cannot be part of a Go
 										// identifier, such as a comma or space.
-												obfuscate Go names in asm header files

Assembly files can include header files within the same Go module,
and those header files can include "defines" which refer to Go names.

Since those Go names are likely being obfuscated,
we need to replace them just like we do in assembly files.

The added mechanism is rather basic; we add two TODOs to improve it.
This should help when building projects like go-ethereum.

Fixes #553.

											
										
										
											2 years ago
+										nameEnd := 0
 										for nameEnd < len(remaining) {
 											c, size := utf8.DecodeRune(remaining[nameEnd:])
 											if !unicode.IsLetter(c) && c != '_' && !unicode.IsDigit(c) {
 												break
 											}
 											nameEnd += size
 										}
 										name := string(remaining[:nameEnd])
 										remaining = remaining[nameEnd:]
-												avoid breaking intrinsics when obfuscating names

We obfuscate import paths as well as their declared names.
The compiler treats some packages and APIs in special ways,
and the way it detects those is by looking at import paths and names.

In the past, we have avoided obfuscating some names like embed.FS or
reflect.Value.MethodByName for this reason. Otherwise,
go:embed or the linker's deadcode elimination might be broken.

This matching by path and name also happens with compiler intrinsics.
Intrinsics allow the compiler to rewrite some standard library calls
with small and efficient assembly, depending on the target GOARCH.
For example, math/bits.TrailingZeros32 gets replaced with ssa.OpCtz32,
which on amd64 may result in using the TZCNTL instruction.

We never noticed that we were breaking many of these intrinsics.
The intrinsics for funcs declared in the runtime and its dependencies
still worked properly, as we do not obfuscate those packages yet.
However, for other packages like math/bits and sync/atomic,
the intrinsics were being entirely disabled due to obfuscated names.

Skipping intrinsics is particularly bad for performance,
and it also leads to slightly larger binaries:

			 │      old      │                 new                 │
			 │     bin-B     │     bin-B      vs base              │
	Build-16   5.450Mi ± ∞ ¹   5.333Mi ± ∞ ¹  -2.15% (p=0.029 n=4)

Finally, the main reason we noticed that intrinsics were broken
is that apparently GOARCH=mips fails to link without them,
as some symbols end up being not defined at all.
This patch fixes builds for the MIPS family of architectures.

Rather than building and linking all of std for every GOARCH,
test that intrinsics work by asking the compiler to print which
intrinsics are being applied, and checking that math/bits gets them.

This fix is relatively unfortunate, as it means we stop obfuscating
about 120 function names and a handful of package paths.
However, fixing builds and intrinsics is much more important.
We can figure out better ways to deal with intrinsics in the future.

Fixes #646.

											
										
										
											1 year ago
+										if lpkg.ToObfuscate && !compilerIntrinsicsFuncs[lpkg.ImportPath+"."+name] {
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+											newName := hashWithPackage(lpkg, name)
 											if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+												log.Printf("asm name %q hashed with %x to %q", name, tf.curPkg.GarbleActionID, newName)
-												support obfuscating the time package

This failed at link time because transformAsm did not know how to handle
the fact that the runtime package's assembly code implements the
`time.now` function via:

	TEXT time·now<ABIInternal>(SB),NOSPLIT,$16-24

First, we need transformAsm to happen for all packages, not just the
ones that we are obfuscating. This is because the runtime can implement
APIs in other packages which are themselves obfuscated, whereas runtime
may not itself be getting obfuscated. This is currently the case with
`GOGARBLE=*` as we do not yet support obfuscating the runtime.

Second, we need to teach replaceAsmNames to handle qualified names with
import paths. Not just to look up the right package information for the
name, but also to obfuscate the package path if necessary.

Third, we need to relax the Deps requirement on listPackage, since the
runtime package and its dependencies are always implicit dependencies.

This is a big step towards being able to obfuscate the runtime, as there
is now just one package left that we cannot obfuscate outside the runtime.

Updates #193.

											
										
										
											2 years ago
+											}
 											buf.WriteString(newName)
 										} else {
-												obfuscate Go names in asm header files

Assembly files can include header files within the same Go module,
and those header files can include "defines" which refer to Go names.

Since those Go names are likely being obfuscated,
we need to replace them just like we do in assembly files.

The added mechanism is rather basic; we add two TODOs to improve it.
This should help when building projects like go-ethereum.

Fixes #553.

											
										
										
											2 years ago
+											buf.WriteString(name)
 										}
 									}
 								}
-												teach -debugdir to produce assembly files too

Up to this point, -debugdir only included obfuscated Go files.
Include assembly files and their headers as well.
While here, ensure that -debugdir supports the standard library too,
and that it behaves correctly with build tags as well.

Also use more consistent names for path strings, to clarify which are
only the basename, and which are the obfuscated version.

											
										
										
											2 years ago
+								// writeSourceFile is a mix between os.CreateTemp and os.WriteFile, as it writes a
-												avoid reproducibility issues with full rebuilds

We were using temporary filenames for modified Go and assembly files.
For example, an obfuscated "encoding/json/encode.go" would end up as:

	/tmp/garble-shared123/encode.go.456.go

where "123" and "456" are random numbers, usually longer.

This was usually fine for two reasons:

1) We would add "/tmp/garble-shared123/" to -trimpath, so the temporary
   directory and its random number would be invisible.

2) We would add "//line" directives to the source files, replacing
   the filename with obfuscated versions excluding any random number.

Unfortunately, this broke in multiple ways. Most notably, assembly files
do not have any line directives, and it's not clear that there's any
support for them. So the random number in their basename could end up in
the binary, breaking reproducibility.

Another issue is that the -trimpath addition described above was only
done for cmd/compile, not cmd/asm, so assembly filenames included the
randomized temporary directory.

To fix the issues above, the same "encoding/json/encode.go" would now
end up as:

	/tmp/garble-shared123/encoding/json/encode.go

Such a path is still unique even though the "456" random number is gone,
as import paths are unique within a single build.

This fixes issues with the base name of each file, so we no longer rely
on line directives as the only way to remove the second original random
number.

We still rely on -trimpath to get rid of the temporary directory in
filenames. To fix its problem with assembly files, also amend the
-trimpath flag when running the assembler tool.

Finally, add a test that reproducible builds still work when a full
rebuild is done. We choose goprivate.txt for such a test as its
stdimporter package imports a number of std packages, including uses of
assembly and cgo.

For the time being, we don't use such a "full rebuild" reproducibility
test in other test scripts, as this step is expensive, rebuilding many
packages from scratch.

This issue went unnoticed for over a year because such random numbers
"123" and "456" were created when a package was obfuscated, and that
only happened once per package version as long as the build cache was
kept intact.

When clearing the build cache, or forcing a rebuild with -a, one gets
new random numbers, and thus a different binary resulting from the same
build input. That's not something that most users would do regularly,
and our tests did not cover that edge case either, until now.

Fixes #328.

											
										
										
											3 years ago
+								// named source file in sharedTempDir given an input buffer.
-												add a writeTemp helper func

We had two nearly identical copies of the code to open a temp file with
CreateTemp, write to it, and handle closing properly. Unify them.

With the added docs this isn't exactly a net win, but we want the long
funcs such as transformCompile to be easy to follow, and this helps.

											
										
										
											3 years ago
+								//
-												avoid reproducibility issues with full rebuilds

We were using temporary filenames for modified Go and assembly files.
For example, an obfuscated "encoding/json/encode.go" would end up as:

	/tmp/garble-shared123/encode.go.456.go

where "123" and "456" are random numbers, usually longer.

This was usually fine for two reasons:

1) We would add "/tmp/garble-shared123/" to -trimpath, so the temporary
   directory and its random number would be invisible.

2) We would add "//line" directives to the source files, replacing
   the filename with obfuscated versions excluding any random number.

Unfortunately, this broke in multiple ways. Most notably, assembly files
do not have any line directives, and it's not clear that there's any
support for them. So the random number in their basename could end up in
the binary, breaking reproducibility.

Another issue is that the -trimpath addition described above was only
done for cmd/compile, not cmd/asm, so assembly filenames included the
randomized temporary directory.

To fix the issues above, the same "encoding/json/encode.go" would now
end up as:

	/tmp/garble-shared123/encoding/json/encode.go

Such a path is still unique even though the "456" random number is gone,
as import paths are unique within a single build.

This fixes issues with the base name of each file, so we no longer rely
on line directives as the only way to remove the second original random
number.

We still rely on -trimpath to get rid of the temporary directory in
filenames. To fix its problem with assembly files, also amend the
-trimpath flag when running the assembler tool.

Finally, add a test that reproducible builds still work when a full
rebuild is done. We choose goprivate.txt for such a test as its
stdimporter package imports a number of std packages, including uses of
assembly and cgo.

For the time being, we don't use such a "full rebuild" reproducibility
test in other test scripts, as this step is expensive, rebuilding many
packages from scratch.

This issue went unnoticed for over a year because such random numbers
"123" and "456" were created when a package was obfuscated, and that
only happened once per package version as long as the build cache was
kept intact.

When clearing the build cache, or forcing a rebuild with -a, one gets
new random numbers, and thus a different binary resulting from the same
build input. That's not something that most users would do regularly,
and our tests did not cover that edge case either, until now.

Fixes #328.

											
										
										
											3 years ago
+								// Note that the file is created under a directory tree following curPkg's
 								// import path, mimicking how files are laid out in modules and GOROOT.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+								func (tf *transformer) writeSourceFile(basename, obfuscated string, content []byte) (string, error) {
-												teach -debugdir to produce assembly files too

Up to this point, -debugdir only included obfuscated Go files.
Include assembly files and their headers as well.
While here, ensure that -debugdir supports the standard library too,
and that it behaves correctly with build tags as well.

Also use more consistent names for path strings, to clarify which are
only the basename, and which are the obfuscated version.

											
										
										
											2 years ago
+									// Uncomment for some quick debugging. Do not delete.
 									// fmt.Fprintf(os.Stderr, "\n-- %s/%s --\n%s", curPkg.ImportPath, basename, content)
 									if flagDebugDir != "" {
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										pkgDir := filepath.Join(flagDebugDir, filepath.FromSlash(tf.curPkg.ImportPath))
-												teach -debugdir to produce assembly files too

Up to this point, -debugdir only included obfuscated Go files.
Include assembly files and their headers as well.
While here, ensure that -debugdir supports the standard library too,
and that it behaves correctly with build tags as well.

Also use more consistent names for path strings, to clarify which are
only the basename, and which are the obfuscated version.

											
										
										
											2 years ago
+										if err := os.MkdirAll(pkgDir, 0o755); err != nil {
 											return "", err
 										}
 										dstPath := filepath.Join(pkgDir, basename)
 										if err := os.WriteFile(dstPath, content, 0o666); err != nil {
 											return "", err
 										}
 									}
-												obfuscate all assembly filenames

We were still leaking the filenames for assembly files.
In our existing asm.txtar test's output binary,
the string `test/main/garble_main_amd64.s` was present.
This leaked full import paths on one hand,
and the filenames of each assembly file on the other.

We avoid this in Go files by using `/*line` directives,
but those are not supported in assembly files.
Instead, obfuscate the paths in the temporary directory.
Note that we still need a separate temporary directory per package,
because otherwise any included header files might collide.

We must remove the `main` package panic in obfuscatedImportPath,
as we now need to use that function for all packages.

While here, remove the outdated comment about `-trimpath`.

Fixes #605.

											
										
										
											2 years ago
+									// We use the obfuscated import path to hold the temporary files.
 									// Assembly files do not support line directives to set positions,
 									// so the only way to not leak the import path is to replace it.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									pkgDir := filepath.Join(sharedTempDir, tf.curPkg.obfuscatedImportPath())
-												avoid reproducibility issues with full rebuilds

We were using temporary filenames for modified Go and assembly files.
For example, an obfuscated "encoding/json/encode.go" would end up as:

	/tmp/garble-shared123/encode.go.456.go

where "123" and "456" are random numbers, usually longer.

This was usually fine for two reasons:

1) We would add "/tmp/garble-shared123/" to -trimpath, so the temporary
   directory and its random number would be invisible.

2) We would add "//line" directives to the source files, replacing
   the filename with obfuscated versions excluding any random number.

Unfortunately, this broke in multiple ways. Most notably, assembly files
do not have any line directives, and it's not clear that there's any
support for them. So the random number in their basename could end up in
the binary, breaking reproducibility.

Another issue is that the -trimpath addition described above was only
done for cmd/compile, not cmd/asm, so assembly filenames included the
randomized temporary directory.

To fix the issues above, the same "encoding/json/encode.go" would now
end up as:

	/tmp/garble-shared123/encoding/json/encode.go

Such a path is still unique even though the "456" random number is gone,
as import paths are unique within a single build.

This fixes issues with the base name of each file, so we no longer rely
on line directives as the only way to remove the second original random
number.

We still rely on -trimpath to get rid of the temporary directory in
filenames. To fix its problem with assembly files, also amend the
-trimpath flag when running the assembler tool.

Finally, add a test that reproducible builds still work when a full
rebuild is done. We choose goprivate.txt for such a test as its
stdimporter package imports a number of std packages, including uses of
assembly and cgo.

For the time being, we don't use such a "full rebuild" reproducibility
test in other test scripts, as this step is expensive, rebuilding many
packages from scratch.

This issue went unnoticed for over a year because such random numbers
"123" and "456" were created when a package was obfuscated, and that
only happened once per package version as long as the build cache was
kept intact.

When clearing the build cache, or forcing a rebuild with -a, one gets
new random numbers, and thus a different binary resulting from the same
build input. That's not something that most users would do regularly,
and our tests did not cover that edge case either, until now.

Fixes #328.

											
										
										
											3 years ago
+									if err := os.MkdirAll(pkgDir, 0o777); err != nil {
-												add a writeTemp helper func

We had two nearly identical copies of the code to open a temp file with
CreateTemp, write to it, and handle closing properly. Unify them.

With the added docs this isn't exactly a net win, but we want the long
funcs such as transformCompile to be easy to follow, and this helps.

											
										
										
											3 years ago
+										return "", err
 									}
-												teach -debugdir to produce assembly files too

Up to this point, -debugdir only included obfuscated Go files.
Include assembly files and their headers as well.
While here, ensure that -debugdir supports the standard library too,
and that it behaves correctly with build tags as well.

Also use more consistent names for path strings, to clarify which are
only the basename, and which are the obfuscated version.

											
										
										
											2 years ago
+									dstPath := filepath.Join(pkgDir, obfuscated)
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
+									if err := writeFileExclusive(dstPath, content); err != nil {
-												add a writeTemp helper func

We had two nearly identical copies of the code to open a temp file with
CreateTemp, write to it, and handle closing properly. Unify them.

With the added docs this isn't exactly a net win, but we want the long
funcs such as transformCompile to be easy to follow, and this helps.

											
										
										
											3 years ago
+										return "", err
 									}
-												avoid reproducibility issues with full rebuilds

We were using temporary filenames for modified Go and assembly files.
For example, an obfuscated "encoding/json/encode.go" would end up as:

	/tmp/garble-shared123/encode.go.456.go

where "123" and "456" are random numbers, usually longer.

This was usually fine for two reasons:

1) We would add "/tmp/garble-shared123/" to -trimpath, so the temporary
   directory and its random number would be invisible.

2) We would add "//line" directives to the source files, replacing
   the filename with obfuscated versions excluding any random number.

Unfortunately, this broke in multiple ways. Most notably, assembly files
do not have any line directives, and it's not clear that there's any
support for them. So the random number in their basename could end up in
the binary, breaking reproducibility.

Another issue is that the -trimpath addition described above was only
done for cmd/compile, not cmd/asm, so assembly filenames included the
randomized temporary directory.

To fix the issues above, the same "encoding/json/encode.go" would now
end up as:

	/tmp/garble-shared123/encoding/json/encode.go

Such a path is still unique even though the "456" random number is gone,
as import paths are unique within a single build.

This fixes issues with the base name of each file, so we no longer rely
on line directives as the only way to remove the second original random
number.

We still rely on -trimpath to get rid of the temporary directory in
filenames. To fix its problem with assembly files, also amend the
-trimpath flag when running the assembler tool.

Finally, add a test that reproducible builds still work when a full
rebuild is done. We choose goprivate.txt for such a test as its
stdimporter package imports a number of std packages, including uses of
assembly and cgo.

For the time being, we don't use such a "full rebuild" reproducibility
test in other test scripts, as this step is expensive, rebuilding many
packages from scratch.

This issue went unnoticed for over a year because such random numbers
"123" and "456" were created when a package was obfuscated, and that
only happened once per package version as long as the build cache was
kept intact.

When clearing the build cache, or forcing a rebuild with -a, one gets
new random numbers, and thus a different binary resulting from the same
build input. That's not something that most users would do regularly,
and our tests did not cover that edge case either, until now.

Fixes #328.

											
										
										
											3 years ago
+									return dstPath, nil
-												replace the -p path when assembling (#247)

The asm tool runs twice for a package with assembly. The second time it
does, the path given to the -p flag matters, just like in the compiler,
as we generate an object file.

We don't have a -buildid flag in the asm tool, so obtaining the action
ID to obfuscate the package path with is a bit tricky. We store it from
transformCompile, and read it from transformAsm. See the detailed docs
for more.

This was the last "skip" line in the tests due to Go 1.16. After all PRs
are merged, one last PR documenting that 1.16 is supported will be sent,
closing the issue for good.

It's unclear why this wasn't an issue in Go 1.15. My best guess is that
the ABI changes only happened in Go 1.16, and this causes exported asm
funcs to start showing up in object files with their package paths.

Updates #124.
											
										
										
											3 years ago
+								}
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+								// parseFiles parses a list of Go files.
 								// It supports relative file paths, such as those found in listedPackage.CompiledGoFiles,
 								// as long as dir is set to listedPackage.Dir.
 								func parseFiles(dir string, paths []string) ([]*ast.File, error) {
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+									var files []*ast.File
 									for _, path := range paths {
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+										if !filepath.IsAbs(path) {
 											path = filepath.Join(dir, path)
 										}
-												use go/parser.SkipObjectResolution

We don't use go/ast.Objects, as we use go/types instead.
Avoiding this work saves a bit of CPU and memory allocs.

	name      old time/op         new time/op         delta
	Build-16          10.2s ± 1%          10.2s ± 1%    ~     (p=0.937 n=6+6)

	name      old bin-B           new bin-B           delta
	Build-16          5.47M ± 0%          5.47M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          328ms ±14%          321ms ± 6%    ~     (p=0.589 n=6+6)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          34.8M ± 0%          34.0M ± 0%  -2.26%  (p=0.010 n=6+4)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          5.89s ± 3%          5.89s ± 3%    ~     (p=0.937 n=6+6)

See golang/go#52463.

											
										
										
											2 years ago
+										file, err := parser.ParseFile(fset, path, nil, parser.SkipObjectResolution|parser.ParseComments)
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+										if err != nil {
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+											return nil, err
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+										}
 										files = append(files, file)
 									}
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+									return files, nil
 								}
 								func (tf *transformer) transformCompile(args []string) ([]string, error) {
 									flags, paths := splitFlagsFromFiles(args, ".go")
 									// We will force the linker to drop DWARF via -w, so don't spend time
 									// generating it.
 									flags = append(flags, "-dwarf=false")
 									// The Go file paths given to the compiler are always absolute paths.
 									files, err := parseFiles("", paths)
 									if err != nil {
 										return nil, err
 									}
-												tweak when we read and write cachedOutput files

We first called the typecheck method, which starts filling cachedOutput
with information from the current package, and later we would load the
gob files for all dependencies via loadCachedOutputs.

This was a bit confusing; instead, load the cached gob files first,
and then do all the operations which fill information for curPkg.

Similarly, we were waiting until the very end of transformCompile to
write curPkg's cachedOutput gob file to the disk cache.
We can write the file at an earlier point, before we have obfuscated and
re-printed all Go files for the current package.
We can also write the file before other work like processImportCfg.

None of these changes should affect garble's behavior,
but they will make the cache redesign for #708 easier.

											
										
										
											1 year ago
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									// Literal and control flow obfuscation uses math/rand, so seed it deterministically.
 									randSeed := tf.curPkg.GarbleActionID[:]
 									if flagSeed.present() {
 										randSeed = flagSeed.bytes
 									}
 									// log.Printf("seeding math/rand with %x\n", randSeed)
 									tf.obfRand = mathrand.New(mathrand.NewSource(int64(binary.BigEndian.Uint64(randSeed))))
-												declare a type for cachedOutput

To properly make our cache robust, we'll need to be able to compute
cache entries for dependencies as needed if they are missing.
So we'll need to create more of these struct values in the code.

Rename cachedOutput to curPkgCache, to clarify that it relates
to the current package.

While here, remove the "known" prefix on all pkgCache fields.
All of the names still make perfect sense without it.

											
										
										
											1 year ago
+									// Even if loadPkgCache below finds a direct cache hit,
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+									// other parts of garble still need type information to obfuscate.
 									// We could potentially avoid this by saving the type info we need in the cache,
 									// although in general that wouldn't help much, since it's rare for Go's cache
 									// to miss on a package and for our cache to hit.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									if tf.pkg, tf.info, err = typecheck(tf.curPkg.ImportPath, files, tf.origImporter); err != nil {
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+										return nil, err
 									}
-												 rework reflection detection with ssa (#732)

This is significantly more robust, than the ast based detection and can
record very complex cases of indirect parameter reflection.

Fixes #554

											
										
										
											1 year ago
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									var (
 										ssaPkg       *ssa.Package
 										requiredPkgs []string
 									)
 									if flagControlFlow {
 										ssaPkg = ssaBuildPkg(tf.pkg, files, tf.info)
 										newFileName, newFile, affectedFiles, err := ctrlflow.Obfuscate(fset, ssaPkg, files, tf.obfRand)
 										if err != nil {
 											return nil, err
 										}
 										if newFile != nil {
 											files = append(files, newFile)
 											paths = append(paths, newFileName)
 											for _, file := range affectedFiles {
 												tf.useAllImports(file)
 											}
 											if tf.pkg, tf.info, err = typecheck(tf.curPkg.ImportPath, files, tf.origImporter); err != nil {
 												return nil, err
 											}
 											for _, imp := range newFile.Imports {
 												path, err := strconv.Unquote(imp.Path.Value)
 												if err != nil {
 													panic(err) // should never happen
 												}
 												requiredPkgs = append(requiredPkgs, path)
 											}
 										}
 									}
 									if tf.curPkgCache, err = loadPkgCache(tf.curPkg, tf.pkg, files, tf.info, ssaPkg); err != nil {
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+										return nil, err
 									}
-												tweak when we read and write cachedOutput files

We first called the typecheck method, which starts filling cachedOutput
with information from the current package, and later we would load the
gob files for all dependencies via loadCachedOutputs.

This was a bit confusing; instead, load the cached gob files first,
and then do all the operations which fill information for curPkg.

Similarly, we were waiting until the very end of transformCompile to
write curPkg's cachedOutput gob file to the disk cache.
We can write the file at an earlier point, before we have obfuscated and
re-printed all Go files for the current package.
We can also write the file before other work like processImportCfg.

None of these changes should affect garble's behavior,
but they will make the cache redesign for #708 easier.

											
										
										
											1 year ago
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									// These maps are not kept in pkgCache, since they are only needed to obfuscate curPkg.
 									tf.fieldToStruct = computeFieldToStruct(tf.info)
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+									if flagLiterals {
-												various minor TODO cleanups

computeLinkerVariableStrinsg had an unusedargument.

Only skip obfuscating the name "FS" in the "embed" package.

The reflect methods no longer use the transformer receiver type,
so that TODO now feels unnecessary. Many methods need to be aware
of what the current types.Package is, and that seems reasonable.

We no longer use writeFileExclusive for our own cache on disk,
so the TODO about using locking or atomic writes is no longer relevant.

											
										
										
											1 year ago
+										if tf.linkerVariableStrings, err = computeLinkerVariableStrings(tf.pkg); err != nil {
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+											return nil, err
 										}
-												tweak when we read and write cachedOutput files

We first called the typecheck method, which starts filling cachedOutput
with information from the current package, and later we would load the
gob files for all dependencies via loadCachedOutputs.

This was a bit confusing; instead, load the cached gob files first,
and then do all the operations which fill information for curPkg.

Similarly, we were waiting until the very end of transformCompile to
write curPkg's cachedOutput gob file to the disk cache.
We can write the file at an earlier point, before we have obfuscated and
re-printed all Go files for the current package.
We can also write the file before other work like processImportCfg.

None of these changes should affect garble's behavior,
but they will make the cache redesign for #708 easier.

											
										
										
											1 year ago
+									}
 									flags = alterTrimpath(flags)
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									newImportCfg, err := tf.processImportCfg(flags, requiredPkgs)
-												stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
											
										
										
											3 years ago
+									if err != nil {
 										return nil, err
 									}
-												concentrate and simplify "to obfuscate" logic

Back in the day, we used to call toObfuscate anytime we needed to know
whether a package should be obfuscated.
More recently, we started computing via the ToObfuscate field,
which then gets shared with all sub-processes via sharedCache.

We still had two places that directly called toObfuscate.
Replace those with ToObfuscate, and inline toObfuscate into shared.go.

obfuscatedImportPath is also a potential footgun for main packages.
Some use cases always want the original "main" package name,
such as for use in the compiler's "-p main" flag,
while other cases want the obfuscated package import path,
such as the entries in importcfg files.

Since each of these call sites handles the edge case well,
obfuscatedImportPath now panics on main packages to avoid any misuse.

Finally, test that we never leak main package paths via ldflags.txt.
We never did, but it's good to make sure.

Overall, this avoids confusion and trims the size of main.go a bit.

											
										
										
											2 years ago
+									// If this is a package to obfuscate, swap the -p flag with the new package path.
 									// We don't if it's the main package, as that just uses "-p main".
 									// We only set newPkgPath if we're obfuscating the import path,
 									// to replace the original package name in the package clause below.
-												refactor "current package" with TOOLEXEC_IMPORTPATH (#266)

Now that we've dropped support for Go 1.15.x, we can finally rely on
this environment variable for toolexec calls, present in Go 1.16.

Before, we had hacky ways of trying to figure out the current package's
import path, mostly from the -p flag. The biggest rough edge there was
that, for main packages, that was simply the package name, and not its
full import path.

To work around that, we had a restriction on a single main package, so
we could work around that issue. That restriction is now gone.

The new code is simpler, especially because we can set curPkg in a
single place for all toolexec transform funcs.

Since we can always rely on curPkg not being nil now, we can also start
reusing listedPackage.Private and avoid the majority of repeated calls
to isPrivate. The function is cheap, but still not free.

isPrivate itself can also get simpler. We no longer have to worry about
the "main" edge case. Plus, the sanity check for invalid package paths
is now unnecessary; we only got malformed paths from goobj2, and we now
require exact matches with the ImportPath field from "go list -json".

Another effect of clearing up the "main" edge case is that -debugdir now
uses the right directory for main packages. We also start using
consistent debugdir paths in the tests, for the sake of being easier to
read and maintain.

Finally, note that commandReverse did not need the extra call to "go
list -toolexec", as the "shared" call stored in the cache is enough. We
still call toolexecCmd to get said cache, which should probably be
simplified in a future PR.

While at it, replace the use of the "-std" compiler flag with the
Standard field from "go list -json".
											
										
										
											3 years ago
+									newPkgPath := ""
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									if tf.curPkg.Name != "main" && tf.curPkg.ToObfuscate {
 										newPkgPath = tf.curPkg.obfuscatedImportPath()
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+										flags = flagSetValue(flags, "-p", newPkgPath)
 									}
-												always use the compiler's -dwarf=false flag (#96)

First, our original append line was completely ineffective; we never
used that "flags" slice again. Second, we only attempted to use the flag
when we obfuscated a package.

In fact, we never care about debugging information here, so for any
package we compile, we can add "-dwarf=false". At the moment, we compile
all packages, even if they aren't to be obfuscated, due to the lack of
access to the build cache.

As such, we save a significant amount of work. The numbers below were
obtained on a quiet machine with "go test -bench=. -benchtime=10x", six
times before and after the change.

	name     old time/op       new time/op       delta
	Build-8        2.06s ± 4%        1.87s ± 2%  -9.21%  (p=0.002 n=6+6)

	name     old sys-time/op   new sys-time/op   delta
	Build-8        1.51s ± 2%        1.46s ± 1%  -3.12%  (p=0.004 n=6+5)

	name     old user-time/op  new user-time/op  delta
	Build-8        11.9s ± 2%        10.8s ± 1%  -8.71%  (p=0.002 n=6+6)

While at it, only do CI builds on pushes and PRs to the master branch,
so that my PRs created from the same repo don't trigger duplicate
builds.
											
										
										
											4 years ago
+									newPaths := make([]string, 0, len(files))
-												Fix calls to linkname functions (#314)


											
										
										
											3 years ago
-												obfuscate local names in linkname directives

Previously, the local name part of linkname directives were never
obfuscated. Now that we can obfuscate function names in Go assembly,
that limitation is required no longer.

Fixes #309

											
										
										
											3 years ago
+									for i, file := range files {
-												teach -debugdir to produce assembly files too

Up to this point, -debugdir only included obfuscated Go files.
Include assembly files and their headers as well.
While here, ensure that -debugdir supports the standard library too,
and that it behaves correctly with build tags as well.

Also use more consistent names for path strings, to clarify which are
only the basename, and which are the obfuscated version.

											
										
										
											2 years ago
+										basename := filepath.Base(paths[i])
 										log.Printf("obfuscating %s", basename)
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										if tf.curPkg.ImportPath == "runtime" {
-												patch and rebuild cmd/link to modify the magic value in pclntab

This value is hard-coded in the linker and written in a header.
We could rewrite the final binary, like we used to do with import paths,
but that would require once again maintaining libraries to do so.

Instead, we're now modifying the linker to do what we want.
It's not particularly hard, as every Go install has its source code,
and rebuilding a slightly modified linker only takes a few seconds at most.

Thanks to `go build -overlay`, we only need to copy the files we modify,
and right now we're just modifying one file in the toolchain.
We use a git patch, as the change is fairly static and small,
and the patch is easier to understand and maintain.

The other side of this change is in the runtime,
as it also hard-codes the magic value when loading information.
We modify the code via syntax trees in that case, like `-tiny` does,
because the change is tiny (one literal) and the affected lines of code
are modified regularly between major Go releases.

Since rebuilding a slightly modified linker can take a few seconds,
and Go's build cache does not cache linked binaries,
we keep our own cached version of the rebuilt binary in `os.UserCacheDir`.

The feature isn't perfect, and will be improved in the future.
See the TODOs about the added dependency on `git`,
or how we are currently only able to cache one linker binary at once.

Fixes #622.
											
										
										
											1 year ago
+											if flagTiny {
 												// strip unneeded runtime code
 												stripRuntime(basename, file)
-												reduce verbosity in the last change

Declare the ast.GenDecl node once,
which allows us to deduplicate and inline code.
resolveImportSpec doesn't need to be separate either.

We also don't need explicit panics for unexpected nils;
we can rely on nil checks inserted by the compiler.

Finally, move the bulk of the code outside of the scope.Names loop,
which unindents most of the logic.

While here, add a few more inline docs.

											
										
										
											1 year ago
+												tf.useAllImports(file)
-												patch and rebuild cmd/link to modify the magic value in pclntab

This value is hard-coded in the linker and written in a header.
We could rewrite the final binary, like we used to do with import paths,
but that would require once again maintaining libraries to do so.

Instead, we're now modifying the linker to do what we want.
It's not particularly hard, as every Go install has its source code,
and rebuilding a slightly modified linker only takes a few seconds at most.

Thanks to `go build -overlay`, we only need to copy the files we modify,
and right now we're just modifying one file in the toolchain.
We use a git patch, as the change is fairly static and small,
and the patch is easier to understand and maintain.

The other side of this change is in the runtime,
as it also hard-codes the magic value when loading information.
We modify the code via syntax trees in that case, like `-tiny` does,
because the change is tiny (one literal) and the affected lines of code
are modified regularly between major Go releases.

Since rebuilding a slightly modified linker can take a few seconds,
and Go's build cache does not cache linked binaries,
we keep our own cached version of the rebuilt binary in `os.UserCacheDir`.

The feature isn't perfect, and will be improved in the future.
See the TODOs about the added dependency on `git`,
or how we are currently only able to cache one linker binary at once.

Fixes #622.
											
										
										
											1 year ago
+											}
 											if basename == "symtab.go" {
 												updateMagicValue(file, magicValue())
-												implement funcInfo.entryoff encryption

At linker stage, we now encrypt funcInfo.entryoff value with a simple algorithm (1 xor + 1 mul). 
This makes it harder to relate function metadata (e.g. name) to function itself in binary, almost without affecting performance.
											
										
										
											1 year ago
+												updateEntryOffset(file, entryOffKey())
-												patch and rebuild cmd/link to modify the magic value in pclntab

This value is hard-coded in the linker and written in a header.
We could rewrite the final binary, like we used to do with import paths,
but that would require once again maintaining libraries to do so.

Instead, we're now modifying the linker to do what we want.
It's not particularly hard, as every Go install has its source code,
and rebuilding a slightly modified linker only takes a few seconds at most.

Thanks to `go build -overlay`, we only need to copy the files we modify,
and right now we're just modifying one file in the toolchain.
We use a git patch, as the change is fairly static and small,
and the patch is easier to understand and maintain.

The other side of this change is in the runtime,
as it also hard-codes the magic value when loading information.
We modify the code via syntax trees in that case, like `-tiny` does,
because the change is tiny (one literal) and the affected lines of code
are modified regularly between major Go releases.

Since rebuilding a slightly modified linker can take a few seconds,
and Go's build cache does not cache linked binaries,
we keep our own cached version of the rebuilt binary in `os.UserCacheDir`.

The feature isn't perfect, and will be improved in the future.
See the TODOs about the added dependency on `git`,
or how we are currently only able to cache one linker binary at once.

Fixes #622.
											
										
										
											1 year ago
+											}
-												initial support for cgo

I'm sure that the added test case doesn't cover many edge cases, but
it's a start.

Fixes #12.

											
										
										
											4 years ago
+										}
-												rename func and update docs on handleDirectives

We've been obfuscating all linknamed names for a while now,
so the part in the docs about "recording" is no longer true.

All it does is transform the directives to use obfuscated names.
Give it a better name and rewrite the docs.

											
										
										
											1 year ago
+										tf.transformDirectives(file.Comments)
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+										file = tf.transformGoFile(file)
-												avoid breaking intrinsics when obfuscating names

We obfuscate import paths as well as their declared names.
The compiler treats some packages and APIs in special ways,
and the way it detects those is by looking at import paths and names.

In the past, we have avoided obfuscating some names like embed.FS or
reflect.Value.MethodByName for this reason. Otherwise,
go:embed or the linker's deadcode elimination might be broken.

This matching by path and name also happens with compiler intrinsics.
Intrinsics allow the compiler to rewrite some standard library calls
with small and efficient assembly, depending on the target GOARCH.
For example, math/bits.TrailingZeros32 gets replaced with ssa.OpCtz32,
which on amd64 may result in using the TZCNTL instruction.

We never noticed that we were breaking many of these intrinsics.
The intrinsics for funcs declared in the runtime and its dependencies
still worked properly, as we do not obfuscate those packages yet.
However, for other packages like math/bits and sync/atomic,
the intrinsics were being entirely disabled due to obfuscated names.

Skipping intrinsics is particularly bad for performance,
and it also leads to slightly larger binaries:

			 │      old      │                 new                 │
			 │     bin-B     │     bin-B      vs base              │
	Build-16   5.450Mi ± ∞ ¹   5.333Mi ± ∞ ¹  -2.15% (p=0.029 n=4)

Finally, the main reason we noticed that intrinsics were broken
is that apparently GOARCH=mips fails to link without them,
as some symbols end up being not defined at all.
This patch fixes builds for the MIPS family of architectures.

Rather than building and linking all of std for every GOARCH,
test that intrinsics work by asking the compiler to print which
intrinsics are being applied, and checking that math/bits gets them.

This fix is relatively unfortunate, as it means we stop obfuscating
about 120 function names and a handful of package paths.
However, fixing builds and intrinsics is much more important.
We can figure out better ways to deal with intrinsics in the future.

Fixes #646.

											
										
										
											1 year ago
+										// newPkgPath might be the original ImportPath in some edge cases like
 										// compilerIntrinsics; we don't want to use slashes in package names.
 										// TODO: when we do away with those edge cases, only check the string is
 										// non-empty.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										if newPkgPath != "" && newPkgPath != tf.curPkg.ImportPath {
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+											file.Name.Name = newPkgPath
 										}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										src, err := printFile(tf.curPkg, file)
-												rework the position obfuscator (#282)

First, rename line_obfuscator.go to position.go. We obfuscate filenames,
not just line numbers, and "obfuscator" is a bit redundant.

Second, use "/*line :x*/" comments rather than the "//line :x" form, as
the former allows us to insert them in any position without adding
unnecessary newlines. This will be important for changing the position
of call sites, which will be important for "garble reverse".

Third, do not rely on go/ast to remove and add comments. Since they are
free-floating, we can very easily end up with misplaced comments,
especially as the literal obfuscator heavily modifies the AST.

The new method prints and re-parses the file, to ensure all node
positions are consistent with a buffer, buf1. Then, we copy the contents
into a new buffer, buf2, while inserting the comments that we need.

The new method also modifies line numbers at the very end of obfuscating
a Go file, instead of at the very beginning. That's going to be more
robust long-term, as we will also obfuscate line numbers for any
additions or modifications to the AST.

Fourth, detachedDirectives is unnecessary, as we can accomplish the same
with two simple prefix matches.

Finally, this means we can stop using detachedComments entirely, as
printFile already inserts the comments we need.

For #5.
											
										
										
											3 years ago
+										if err != nil {
 											return nil, err
 										}
-												teach -debugdir to produce assembly files too

Up to this point, -debugdir only included obfuscated Go files.
Include assembly files and their headers as well.
While here, ensure that -debugdir supports the standard library too,
and that it behaves correctly with build tags as well.

Also use more consistent names for path strings, to clarify which are
only the basename, and which are the obfuscated version.

											
										
										
											2 years ago
+										// We hide Go source filenames via "//line" directives,
 										// so there is no need to use obfuscated filenames here.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										if path, err := tf.writeSourceFile(basename, basename, src); err != nil {
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+											return nil, err
-												add a writeTemp helper func

We had two nearly identical copies of the code to open a temp file with
CreateTemp, write to it, and handle closing properly. Unify them.

With the added docs this isn't exactly a net win, but we want the long
funcs such as transformCompile to be easy to follow, and this helps.

											
										
										
											3 years ago
+										} else {
 											newPaths = append(newPaths, path)
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+										}
 									}
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+									flags = flagSetValue(flags, "-importcfg", newImportCfg)
-												speed up builds with the compiler's -dwarf=false flag

Generating DWARF in the compiler object files could take as much as 10%
extra CPU time, while we ignore it entirely at the link stage.

Speeds up 'go test -short' from ~19.5s to ~18.5s on my laptop.

											
										
										
											4 years ago
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+									return append(flags, newPaths...), nil
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+								}
-												rename func and update docs on handleDirectives

We've been obfuscating all linknamed names for a while now,
so the part in the docs about "recording" is no longer true.

All it does is transform the directives to use obfuscated names.
Give it a better name and rewrite the docs.

											
										
										
											1 year ago
+								// transformDirectives rewrites //go:linkname toolchain directives in comments
 								// to replace names with their obfuscated versions.
 								func (tf *transformer) transformDirectives(comments []*ast.CommentGroup) {
-												rework the position obfuscator (#282)

First, rename line_obfuscator.go to position.go. We obfuscate filenames,
not just line numbers, and "obfuscator" is a bit redundant.

Second, use "/*line :x*/" comments rather than the "//line :x" form, as
the former allows us to insert them in any position without adding
unnecessary newlines. This will be important for changing the position
of call sites, which will be important for "garble reverse".

Third, do not rely on go/ast to remove and add comments. Since they are
free-floating, we can very easily end up with misplaced comments,
especially as the literal obfuscator heavily modifies the AST.

The new method prints and re-parses the file, to ensure all node
positions are consistent with a buffer, buf1. Then, we copy the contents
into a new buffer, buf2, while inserting the comments that we need.

The new method also modifies line numbers at the very end of obfuscating
a Go file, instead of at the very beginning. That's going to be more
robust long-term, as we will also obfuscate line numbers for any
additions or modifications to the AST.

Fourth, detachedDirectives is unnecessary, as we can accomplish the same
with two simple prefix matches.

Finally, this means we can stop using detachedComments entirely, as
printFile already inserts the comments we need.

For #5.
											
										
										
											3 years ago
+									for _, group := range comments {
 										for _, comment := range group.List {
 											if !strings.HasPrefix(comment.Text, "//go:linkname ") {
 												continue
 											}
-												support obfuscating the syscall package

One more package that further unblocks obfuscating the runtime.
The issue was the TODO we already had about go:linkname directives with
just one argument, which are used in the syscall package.

While here, factor out the obfuscation of linkname directives into
transformLinkname, as it was starting to get a bit complex.
We now support debug logging as well, while still being able to use
"early returns" for some cases where we bail out.

We also need listPackage to treat all runtime sub-packages like it does
runtime itself, as `runtime/internal/syscall` linknames into `syscall`
without it being a dependency as well.

Finally, add a regression test that, without the fix,
properly spots that the syscall package was not obfuscated:

	FAIL: testdata/script/gogarble.txtar:41: unexpected match for ["syscall.RawSyscall6"] in out

Updates #193.

											
										
										
											2 years ago
 											// We can have either just one argument:
 											//
 											//	//go:linkname localName
 											//
 											// Or two arguments, where the second may refer to a name in a
 											// different package:
 											//
 											//	//go:linkname localName newName
 											//	//go:linkname localName pkg.newName
-												rework the position obfuscator (#282)

First, rename line_obfuscator.go to position.go. We obfuscate filenames,
not just line numbers, and "obfuscator" is a bit redundant.

Second, use "/*line :x*/" comments rather than the "//line :x" form, as
the former allows us to insert them in any position without adding
unnecessary newlines. This will be important for changing the position
of call sites, which will be important for "garble reverse".

Third, do not rely on go/ast to remove and add comments. Since they are
free-floating, we can very easily end up with misplaced comments,
especially as the literal obfuscator heavily modifies the AST.

The new method prints and re-parses the file, to ensure all node
positions are consistent with a buffer, buf1. Then, we copy the contents
into a new buffer, buf2, while inserting the comments that we need.

The new method also modifies line numbers at the very end of obfuscating
a Go file, instead of at the very beginning. That's going to be more
robust long-term, as we will also obfuscate line numbers for any
additions or modifications to the AST.

Fourth, detachedDirectives is unnecessary, as we can accomplish the same
with two simple prefix matches.

Finally, this means we can stop using detachedComments entirely, as
printFile already inserts the comments we need.

For #5.
											
										
										
											3 years ago
+											fields := strings.Fields(comment.Text)
-												support obfuscating the syscall package

One more package that further unblocks obfuscating the runtime.
The issue was the TODO we already had about go:linkname directives with
just one argument, which are used in the syscall package.

While here, factor out the obfuscation of linkname directives into
transformLinkname, as it was starting to get a bit complex.
We now support debug logging as well, while still being able to use
"early returns" for some cases where we bail out.

We also need listPackage to treat all runtime sub-packages like it does
runtime itself, as `runtime/internal/syscall` linknames into `syscall`
without it being a dependency as well.

Finally, add a regression test that, without the fix,
properly spots that the syscall package was not obfuscated:

	FAIL: testdata/script/gogarble.txtar:41: unexpected match for ["syscall.RawSyscall6"] in out

Updates #193.

											
										
										
											2 years ago
+											localName := fields[1]
 											newName := ""
 											if len(fields) == 3 {
 												newName = fields[2]
-												rework the position obfuscator (#282)

First, rename line_obfuscator.go to position.go. We obfuscate filenames,
not just line numbers, and "obfuscator" is a bit redundant.

Second, use "/*line :x*/" comments rather than the "//line :x" form, as
the former allows us to insert them in any position without adding
unnecessary newlines. This will be important for changing the position
of call sites, which will be important for "garble reverse".

Third, do not rely on go/ast to remove and add comments. Since they are
free-floating, we can very easily end up with misplaced comments,
especially as the literal obfuscator heavily modifies the AST.

The new method prints and re-parses the file, to ensure all node
positions are consistent with a buffer, buf1. Then, we copy the contents
into a new buffer, buf2, while inserting the comments that we need.

The new method also modifies line numbers at the very end of obfuscating
a Go file, instead of at the very beginning. That's going to be more
robust long-term, as we will also obfuscate line numbers for any
additions or modifications to the AST.

Fourth, detachedDirectives is unnecessary, as we can accomplish the same
with two simple prefix matches.

Finally, this means we can stop using detachedComments entirely, as
printFile already inserts the comments we need.

For #5.
											
										
										
											3 years ago
+											}
-												all: use better names than "blacklist", and docs (#206)

The three transformer map fields are now very well documented, which was
badly needed for anyone trying to understand the source code.

ignoreObjects is also a better field name than blacklist, as it says
what the map is indexed by (types.Object) and what we do with those:
ignore them when we obfuscate code.

The rewriting of go:linkname directives is moved to a separate func, so
that we can name that func from the docs.

Finally, the docs are overall improved a bit, as I was re-tracing all
the pieces of code that used the ambiguous "blacklist" terminology.

Fixes #169.
											
										
										
											3 years ago
-												support obfuscating the syscall package

One more package that further unblocks obfuscating the runtime.
The issue was the TODO we already had about go:linkname directives with
just one argument, which are used in the syscall package.

While here, factor out the obfuscation of linkname directives into
transformLinkname, as it was starting to get a bit complex.
We now support debug logging as well, while still being able to use
"early returns" for some cases where we bail out.

We also need listPackage to treat all runtime sub-packages like it does
runtime itself, as `runtime/internal/syscall` linknames into `syscall`
without it being a dependency as well.

Finally, add a regression test that, without the fix,
properly spots that the syscall package was not obfuscated:

	FAIL: testdata/script/gogarble.txtar:41: unexpected match for ["syscall.RawSyscall6"] in out

Updates #193.

											
										
										
											2 years ago
+											localName, newName = tf.transformLinkname(localName, newName)
 											fields[1] = localName
 											if len(fields) == 3 {
 												fields[2] = newName
-												Obfuscate more packages of the standard library (#312)

Also update linkname directives of public packages,
to allow the package where something is linknamed to to be
obfuscated regardless.

Public packages can now depend on private packages.
											
										
										
											3 years ago
+											}
-												all: use better names than "blacklist", and docs (#206)

The three transformer map fields are now very well documented, which was
badly needed for anyone trying to understand the source code.

ignoreObjects is also a better field name than blacklist, as it says
what the map is indexed by (types.Object) and what we do with those:
ignore them when we obfuscate code.

The rewriting of go:linkname directives is moved to a separate func, so
that we can name that func from the docs.

Finally, the docs are overall improved a bit, as I was re-tracing all
the pieces of code that used the ambiguous "blacklist" terminology.

Fixes #169.
											
										
										
											3 years ago
-												support obfuscating the syscall package

One more package that further unblocks obfuscating the runtime.
The issue was the TODO we already had about go:linkname directives with
just one argument, which are used in the syscall package.

While here, factor out the obfuscation of linkname directives into
transformLinkname, as it was starting to get a bit complex.
We now support debug logging as well, while still being able to use
"early returns" for some cases where we bail out.

We also need listPackage to treat all runtime sub-packages like it does
runtime itself, as `runtime/internal/syscall` linknames into `syscall`
without it being a dependency as well.

Finally, add a regression test that, without the fix,
properly spots that the syscall package was not obfuscated:

	FAIL: testdata/script/gogarble.txtar:41: unexpected match for ["syscall.RawSyscall6"] in out

Updates #193.

											
										
										
											2 years ago
+											if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed
 												log.Printf("linkname %q changed to %q", comment.Text, strings.Join(fields, " "))
-												ensure the runtime is built in a reproducible way

We went to great lengths to ensure garble builds are reproducible.
This includes how the tool itself works,
as its behavior should be the same given the same inputs.

However, we made one crucial mistake with the runtime package.
It has go:linkname directives pointing at other packages,
and some of those pointed packages aren't its dependencies.

Imagine two scenarios where garble builds the runtime package:

1) We run "garble build runtime". The way we handle linkname directives
   calls listPackage on the target package, to obfuscate the target's
   import path and object name. However, since we only obtained build
   info of runtime and its deps, calls for some linknames such as
   listPackage("sync/atomic") will fail. The linkname directive will
   leave its target untouched.

2) We run "garble build std". Unlike the first scenario, all listPackage
   calls issued by runtime's linkname directives will succeed, so its
   linkname directive targets will be obfuscated.

At best, this can result in inconsistent builds, depending on how the
runtime package was built. At worst, the mismatching object names can
result in errors at link time, if the target packages are actually used.

The modified test reproduces the worst case scenario reliably,
when the fix is reverted:

	> env GOCACHE=${WORK}/gocache-empty
	> garble build -a runtime
	> garble build -o=out_rebuild ./stdimporter
	[stderr]
	# test/main/stdimporter
	JZzQivnl.NtQJu0H3: relocation target JZzQivnl.iioHinYT not defined
	JZzQivnl.NtQJu0H3.func9: relocation target JZzQivnl.yz5z0NaH not defined
	JZzQivnl.(*ypvqhKiQ).String: relocation target JZzQivnl.eVciBQeI not defined
	JZzQivnl.(*ypvqhKiQ).PkgPath: relocation target JZzQivnl.eVciBQeI not defined
	[...]

The fix consists of two steps. First, if we're building the runtime and
listPackage fails on a package, that means we ran into scenario 1 above.
To avoid the inconsistency, we fill ListedPackages with "go list [...] std".
This means we'll always build runtime as described in scenario 2 above.

Second, when building packages other than the runtime,
we only allow listPackage to succeed if we're listing a dependency of
the current package.
This ensures we won't run into similar reproducibility bugs in the future.

Finally, re-enable test-gotip on CI since this was the last test flake.

											
										
										
											2 years ago
+											}
-												support obfuscating the syscall package

One more package that further unblocks obfuscating the runtime.
The issue was the TODO we already had about go:linkname directives with
just one argument, which are used in the syscall package.

While here, factor out the obfuscation of linkname directives into
transformLinkname, as it was starting to get a bit complex.
We now support debug logging as well, while still being able to use
"early returns" for some cases where we bail out.

We also need listPackage to treat all runtime sub-packages like it does
runtime itself, as `runtime/internal/syscall` linknames into `syscall`
without it being a dependency as well.

Finally, add a regression test that, without the fix,
properly spots that the syscall package was not obfuscated:

	FAIL: testdata/script/gogarble.txtar:41: unexpected match for ["syscall.RawSyscall6"] in out

Updates #193.

											
										
										
											2 years ago
+											comment.Text = strings.Join(fields, " ")
 										}
 									}
 								}
-												fix obfuscating linkname directives that where the package name contained a dot

											
										
										
											3 years ago
-												support obfuscating the syscall package

One more package that further unblocks obfuscating the runtime.
The issue was the TODO we already had about go:linkname directives with
just one argument, which are used in the syscall package.

While here, factor out the obfuscation of linkname directives into
transformLinkname, as it was starting to get a bit complex.
We now support debug logging as well, while still being able to use
"early returns" for some cases where we bail out.

We also need listPackage to treat all runtime sub-packages like it does
runtime itself, as `runtime/internal/syscall` linknames into `syscall`
without it being a dependency as well.

Finally, add a regression test that, without the fix,
properly spots that the syscall package was not obfuscated:

	FAIL: testdata/script/gogarble.txtar:41: unexpected match for ["syscall.RawSyscall6"] in out

Updates #193.

											
										
										
											2 years ago
+								func (tf *transformer) transformLinkname(localName, newName string) (string, string) {
 									// obfuscate the local name, if the current package is obfuscated
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									if tf.curPkg.ToObfuscate && !compilerIntrinsicsFuncs[tf.curPkg.ImportPath+"."+localName] {
 										localName = hashWithPackage(tf.curPkg, localName)
-												support obfuscating the syscall package

One more package that further unblocks obfuscating the runtime.
The issue was the TODO we already had about go:linkname directives with
just one argument, which are used in the syscall package.

While here, factor out the obfuscation of linkname directives into
transformLinkname, as it was starting to get a bit complex.
We now support debug logging as well, while still being able to use
"early returns" for some cases where we bail out.

We also need listPackage to treat all runtime sub-packages like it does
runtime itself, as `runtime/internal/syscall` linknames into `syscall`
without it being a dependency as well.

Finally, add a regression test that, without the fix,
properly spots that the syscall package was not obfuscated:

	FAIL: testdata/script/gogarble.txtar:41: unexpected match for ["syscall.RawSyscall6"] in out

Updates #193.

											
										
										
											2 years ago
+									}
 									if newName == "" {
 										return localName, ""
 									}
 									// If the new name is of the form "pkgpath.Name", and we've obfuscated
 									// "Name" in that package, rewrite the directive to use the obfuscated name.
 									dotCnt := strings.Count(newName, ".")
 									if dotCnt < 1 {
 										// cgo-generated code uses linknames to made up symbol names,
 										// which do not have a package path at all.
 										// Replace the comment in case the local name was obfuscated.
 										return localName, newName
 									}
 									switch newName {
 									case "main.main", "main..inittask", "runtime..inittask":
 										// The runtime uses some special symbols with "..".
 										// We aren't touching those at the moment.
 										return localName, newName
 									}
-												fix obfuscating linkname directives that where the package name contained a dot

											
										
										
											3 years ago
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
+									pkgSplit := 0
 									var lpkg *listedPackage
 									var foreignName string
 									for {
 										i := strings.Index(newName[pkgSplit:], ".")
 										if i < 0 {
 											// We couldn't find a prefix that matched a known package.
-												default to GOGARBLE=*, stop using GOPRIVATE

We can drop the code that kicked in when GOGARBLE was empty.
We can also add the value in addGarbleToHash unconditionally,
as we never allow it to be empty.

In the tests, remove all GOGARBLE lines where it just meant "obfuscate
everything" or "obfuscate the entire main module".

cgo.txtar had "obfuscate everything" as a separate step,
so remove it entirely.

linkname.txtar started failing because the imported package did not
import strings, so listPackage errored out. This wasn't a problem when
strings itself wasn't obfuscated, as transformLinkname silently left
strings.IndexByte untouched. It is a problem when IndexByte does get
obfuscated. Make that kind of listPackage error visible, and fix it.

reflect.txtar started failing with "unreachable method" runtime throws.
It's not clear to me why; it appears that GOGARBLE=* makes the linker
think that ExportedMethodName is suddenly unreachable.
Work around the problem by making the method explicitly reachable,
and leave a TODO as a reminder to investigate.

Finally, gogarble.txtar no longer needs to test for GOPRIVATE.
The rest of the test is left the same, as we still want the various
values for GOGARBLE to continue to work just like before.

Fixes #594.

											
										
										
											2 years ago
+											// Probably a made up name like above, but with a dot.
 											return localName, newName
 										}
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
+										pkgSplit += i
 										pkgPath := newName[:pkgSplit]
 										pkgSplit++ // skip over the dot
-												simplify our handling of "go list" errors

First, teach scripts/gen-go-std-tables.sh to omit test packages,
since runtime/metrics_test would always result in an error.
Instead, make transformLinkname explicitly skip that package,
leaving a comment about a potential improvement if needed.

Second, the only remaining "not found" error we had was "maps" on 1.20,
so rewrite that check based on ImportPath and GoVersionSemver.

Third, detect packages with the "exclude all Go files" error
by looking at CompiledGoFiles and IgnoredGoFiles, which is less brittle.
This means that we are no longer doing any filtering on pkg.Error.Err,
which means we are less likely to break with Go error message changes.

Fourth, the check on pkg.Incomplete is now obsolete given the above,
meaning that the CompiledGoFiles length check is plenty.

Finally, stop trying to be clever about how we print errors.
Now that we're no longer skipping packages based on pkg.Error values,
printing pkg.DepsErrors was causing duplicate messages in the output.
Simply print pkg.Error values with only minimal tweaks:
including the position if there is any, and avoiding double newlines.

Overall, this makes our logic a lot less complicated,
and garble still works the way we want it to.

											
										
										
											1 year ago
+										if strings.HasSuffix(pkgPath, "_test") {
 											// runtime uses a go:linkname to metrics_test;
 											// we don't need this to work for now on regular builds,
 											// though we might need to rethink this if we want "go test std" to work.
 											continue
 										}
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
+										var err error
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										lpkg, err = listPackage(tf.curPkg, pkgPath)
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
+										if err == nil {
 											foreignName = newName[pkgSplit:]
 											break
 										}
 										if errors.Is(err, ErrNotFound) {
 											// No match; find the next dot.
 											continue
 										}
-												use errors.Is for listPackage errors

											
										
										
											1 year ago
+										if errors.Is(err, ErrNotDependency) {
-												default to GOGARBLE=*, stop using GOPRIVATE

We can drop the code that kicked in when GOGARBLE was empty.
We can also add the value in addGarbleToHash unconditionally,
as we never allow it to be empty.

In the tests, remove all GOGARBLE lines where it just meant "obfuscate
everything" or "obfuscate the entire main module".

cgo.txtar had "obfuscate everything" as a separate step,
so remove it entirely.

linkname.txtar started failing because the imported package did not
import strings, so listPackage errored out. This wasn't a problem when
strings itself wasn't obfuscated, as transformLinkname silently left
strings.IndexByte untouched. It is a problem when IndexByte does get
obfuscated. Make that kind of listPackage error visible, and fix it.

reflect.txtar started failing with "unreachable method" runtime throws.
It's not clear to me why; it appears that GOGARBLE=* makes the linker
think that ExportedMethodName is suddenly unreachable.
Work around the problem by making the method explicitly reachable,
and leave a TODO as a reminder to investigate.

Finally, gogarble.txtar no longer needs to test for GOPRIVATE.
The rest of the test is left the same, as we still want the various
values for GOGARBLE to continue to work just like before.

Fixes #594.

											
										
										
											2 years ago
+											fmt.Fprintf(os.Stderr,
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
+												"//go:linkname refers to %s - add `import _ %q` for garble to find the package",
-												default to GOGARBLE=*, stop using GOPRIVATE

We can drop the code that kicked in when GOGARBLE was empty.
We can also add the value in addGarbleToHash unconditionally,
as we never allow it to be empty.

In the tests, remove all GOGARBLE lines where it just meant "obfuscate
everything" or "obfuscate the entire main module".

cgo.txtar had "obfuscate everything" as a separate step,
so remove it entirely.

linkname.txtar started failing because the imported package did not
import strings, so listPackage errored out. This wasn't a problem when
strings itself wasn't obfuscated, as transformLinkname silently left
strings.IndexByte untouched. It is a problem when IndexByte does get
obfuscated. Make that kind of listPackage error visible, and fix it.

reflect.txtar started failing with "unreachable method" runtime throws.
It's not clear to me why; it appears that GOGARBLE=* makes the linker
think that ExportedMethodName is suddenly unreachable.
Work around the problem by making the method explicitly reachable,
and leave a TODO as a reminder to investigate.

Finally, gogarble.txtar no longer needs to test for GOPRIVATE.
The rest of the test is left the same, as we still want the various
values for GOGARBLE to continue to work just like before.

Fixes #594.

											
										
										
											2 years ago
+												newName, pkgPath)
 											return localName, newName
 										}
-												remove duplicate err!=nil check

											
										
										
											1 year ago
+										panic(err) // shouldn't happen
-												support obfuscating the syscall package

One more package that further unblocks obfuscating the runtime.
The issue was the TODO we already had about go:linkname directives with
just one argument, which are used in the syscall package.

While here, factor out the obfuscation of linkname directives into
transformLinkname, as it was starting to get a bit complex.
We now support debug logging as well, while still being able to use
"early returns" for some cases where we bail out.

We also need listPackage to treat all runtime sub-packages like it does
runtime itself, as `runtime/internal/syscall` linknames into `syscall`
without it being a dependency as well.

Finally, add a regression test that, without the fix,
properly spots that the syscall package was not obfuscated:

	FAIL: testdata/script/gogarble.txtar:41: unexpected match for ["syscall.RawSyscall6"] in out

Updates #193.

											
										
										
											2 years ago
+									}
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
 									if !lpkg.ToObfuscate || compilerIntrinsicsFuncs[lpkg.ImportPath+"."+foreignName] {
 										// We're not obfuscating that package or name.
 										return localName, newName
 									}
 									var newForeignName string
 									if receiver, name, ok := strings.Cut(foreignName, "."); ok {
-												avoid breaking github.com/davecgh/go-spew

It assumes that reflect.Value has a field named "flag",
which wasn't the case with obfuscated builds as we obfuscated it.

We already treated the reflect package as special,
for instance when not obfuscating Method or MethodByName.
In a similar fashion, mark reflect's rtype and Value types to not be
obfuscated alongside their field names. Note that rtype is the
implementation behind the reflect.Type interface.

This fix is fairly manual and repetitive.
transformCompile, transformLinkname, and transformAsm should all
use the same mechanism to tell if names should be obfuscated.
However, they do not do that right now, and that refactor feels too
risky for a bugfix release. We add more TODOs instead.

We're not adding go-spew to scripts/check-third-party.sh since the
project is largely abandoned. It's not even a Go module yet.
The only broken bit from it is what we've added to our tests.

Fixes #676.

											
										
										
											1 year ago
+										if lpkg.ImportPath == "reflect" && (receiver == "(*rtype)" || receiver == "Value") {
 											// These receivers are not obfuscated.
 											// See the TODO below.
 										} else if strings.HasPrefix(receiver, "(*") {
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
+											// pkg/path.(*Receiver).method
 											receiver = strings.TrimPrefix(receiver, "(*")
 											receiver = strings.TrimSuffix(receiver, ")")
 											receiver = "(*" + hashWithPackage(lpkg, receiver) + ")"
 										} else {
 											// pkg/path.Receiver.method
 											receiver = hashWithPackage(lpkg, receiver)
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+										}
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
+										// Exported methods are never obfuscated.
 										//
-												avoid breaking github.com/davecgh/go-spew

It assumes that reflect.Value has a field named "flag",
which wasn't the case with obfuscated builds as we obfuscated it.

We already treated the reflect package as special,
for instance when not obfuscating Method or MethodByName.
In a similar fashion, mark reflect's rtype and Value types to not be
obfuscated alongside their field names. Note that rtype is the
implementation behind the reflect.Type interface.

This fix is fairly manual and repetitive.
transformCompile, transformLinkname, and transformAsm should all
use the same mechanism to tell if names should be obfuscated.
However, they do not do that right now, and that refactor feels too
risky for a bugfix release. We add more TODOs instead.

We're not adding go-spew to scripts/check-third-party.sh since the
project is largely abandoned. It's not even a Go module yet.
The only broken bit from it is what we've added to our tests.

Fixes #676.

											
										
										
											1 year ago
+										// TODO(mvdan): We're duplicating the logic behind these decisions.
 										// Reuse the logic with transformCompile.
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
+										if !token.IsExported(name) {
 											name = hashWithPackage(lpkg, name)
 										}
 										newForeignName = receiver + "." + name
 									} else {
 										// pkg/path.function
 										newForeignName = hashWithPackage(lpkg, foreignName)
 									}
 									newPkgPath := lpkg.ImportPath
 									if newPkgPath != "main" {
 										newPkgPath = lpkg.obfuscatedImportPath()
-												all: use better names than "blacklist", and docs (#206)

The three transformer map fields are now very well documented, which was
badly needed for anyone trying to understand the source code.

ignoreObjects is also a better field name than blacklist, as it says
what the map is indexed by (types.Object) and what we do with those:
ignore them when we obfuscate code.

The rewriting of go:linkname directives is moved to a separate func, so
that we can name that func from the docs.

Finally, the docs are overall improved a bit, as I was re-tracing all
the pieces of code that used the ambiguous "blacklist" terminology.

Fixes #169.
											
										
										
											3 years ago
+									}
-												support go:linkname directives pointing at methods

This is not common, but it is done by a few projects.
Namely, github.com/goccy/go-json reached into reflect's guts,
which included a number of methods:

	internal/runtime/rtype.go
	11://go:linkname rtype_Align reflect.(*rtype).Align
	19://go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
	27://go:linkname rtype_Method reflect.(*rtype).Method
	35://go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
	[...]

Add tests for such go:linkname directives pointing at methods.
Note that there are two possible symbol string variants;
"pkg/path.(*Receiver).method" for methods with pointer receivers,
and "pkg/path.Receiver.method" for the rest.

We can't assume that the presence of two dots means a method either.
For example, a package path may be "pkg/path.with.dots",
and so "pkg/path.with.dots.SomeFunc" is the function "SomeFunc"
rather than the method "SomeFunc" on a type "dots".
To account for this ambiguity, rather than splitting on the last dot
like we used to, try to find a package path prefix by splitting on an
increasing number of first dots.

This can in theory still be ambiguous. For example,
we could have the package "pkg/path" expose the method "foo.bar",
and the package "pkg/path.foo" expose the func "bar".
Then, the symbol string "pkg/path.foo.bar" could mean either of them.
However, this seems extremely unlikely to happen in practice,
and I'm not sure that Go's toolchain would support it either.

I also noticed that goccy/go-json still failed to build after the fix.
The reason was that the type reflect.rtype wasn't being obfuscated.
We could, and likely should, teach our assembly and linkname
transformers about which names we chose not to obfuscate due to the use
of reflection. However, in this particular case, reflect's own types
can be obfuscated safely, so just do that.

Fixes #656.

											
										
										
											1 year ago
+									newName = newPkgPath + "." + newForeignName
-												support obfuscating the syscall package

One more package that further unblocks obfuscating the runtime.
The issue was the TODO we already had about go:linkname directives with
just one argument, which are used in the syscall package.

While here, factor out the obfuscation of linkname directives into
transformLinkname, as it was starting to get a bit complex.
We now support debug logging as well, while still being able to use
"early returns" for some cases where we bail out.

We also need listPackage to treat all runtime sub-packages like it does
runtime itself, as `runtime/internal/syscall` linknames into `syscall`
without it being a dependency as well.

Finally, add a regression test that, without the fix,
properly spots that the syscall package was not obfuscated:

	FAIL: testdata/script/gogarble.txtar:41: unexpected match for ["syscall.RawSyscall6"] in out

Updates #193.

											
										
										
											2 years ago
+									return localName, newName
-												all: use better names than "blacklist", and docs (#206)

The three transformer map fields are now very well documented, which was
badly needed for anyone trying to understand the source code.

ignoreObjects is also a better field name than blacklist, as it says
what the map is indexed by (types.Object) and what we do with those:
ignore them when we obfuscate code.

The rewriting of go:linkname directives is moved to a separate func, so
that we can name that func from the docs.

Finally, the docs are overall improved a bit, as I was re-tracing all
the pieces of code that used the ambiguous "blacklist" terminology.

Fixes #169.
											
										
										
											3 years ago
+								}
-												stop loading obfuscated type information from deps

If package P1 imports package P2, P1 needs to know which names from P2
weren't obfuscated. For instance, if P2 declares T2 and does
"reflect.TypeOf(T2{...})", then P2 won't obfuscate the name T2, and
neither should P1.

This information should flow from P2 to P1, as P2 builds before
P1. We do this via obfuscatedTypesPackage; P1 loads the type information
of the obfuscated version of P2, and does a lookup for T2. If T2 exists,
then it wasn't obfuscated.

This mechanism has served us well, but it has downsides:

1) It wastes CPU; we load the type information for the entire package.

2) It's complex; for instance, we need KnownObjectFiles as an extra.

3) It makes our code harder to understand, as we load both the original
   and obfuscated type informaiton.

Instead, we now have each package record what names were not obfuscated
as part of its cachedOuput file. Much like KnownObjectFiles, the map
records incrementally through the import graph, to avoid having to load
cachedOutput files for indirect dependencies.

We shouldn't need to worry about those maps getting large;
we only skip obfuscating declared names in a few uncommon scenarios,
such as the use of reflection or cgo's "//export".

Since go/types is relatively allocation-heavy, and the export files
contain a lot of data, we get a nice speed-up:

	name      old time/op         new time/op         delta
	Build-16          11.5s ± 2%          11.1s ± 3%  -3.77%  (p=0.008 n=5+5)

	name      old bin-B           new bin-B           delta
	Build-16          5.15M ± 0%          5.15M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          375ms ± 3%          341ms ± 6%  -8.96%  (p=0.008 n=5+5)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          283ms ±17%          289ms ±13%    ~     (p=0.841 n=5+5)

	name      old user-time/op    new user-time/op    delta
	Build-16          687ms ± 6%          664ms ± 7%    ~     (p=0.548 n=5+5)

Fixes #456.
Updates #475.

											
										
										
											2 years ago
+								// processImportCfg parses the importcfg file passed to a compile or link step.
-												stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
											
										
										
											3 years ago
+								// It also builds a new importcfg file to account for obfuscated import paths.
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+								func (tf *transformer) processImportCfg(flags []string, requiredPkgs []string) (newImportCfg string, _ error) {
-												start using original action IDs (#251)

When we obfuscate a name, what we do is hash the name with the action ID
of the package that contains the name. To ensure that the hash changes
if the garble tool changes, we used the action ID of the obfuscated
build, which is different than the original action ID, as we include
garble's own content ID in "go tool compile -V=full" via -toolexec.

Let's call that the "obfuscated action ID". Remember that a content ID
is roughly the hash of a binary or object file, and an action ID
contains the hash of a package's source code plus the content IDs of its
dependencies.

This had the advantage that it did what we wanted. However, it had one
massive drawback: when we compile a package, we only have the obfuscated
action IDs of its dependencies. This is because one can't have the
content ID of dependent packages before they are built.

Usually, this is not a problem, because hashing a foreign name means it
comes from a dependency, where we already have the obfuscated action ID.
However, that's not always the case.

First, go:linkname directives can point to any symbol that ends up in
the binary, even if the package is not a dependency. So garble could
only support linkname targets belonging to dependencies. This is at the
root of why we could not obfuscate the runtime; it contains linkname
directives targeting the net package, for example, which depends on runtime.

Second, some other places did not have an easy access to obfuscated
action IDs, like transformAsm, which had to recover it from a temporary
file stored by transformCompile.

Plus, this was all pretty expensive, as each toolexec sub-process had to
make repeated calls to buildidOf with the object files of dependencies.
We even had to use extra calls to "go list" in the case of indirect
dependencies, as their export files do not appear in importcfg files.

All in all, the old method was complex and expensive. A better mechanism
is to use the original action IDs directly, as listed by "go list"
without garble in the picture.

This would mean that the hashing does not change if garble changes,
meaning weaker obfuscation. To regain that property, we define the
"garble action ID", which is just the original action ID hashed together
with garble's own content ID.

This is practically the same as the obfuscated build ID we used before,
but since it doesn't go through "go tool compile -V=full" and the
obfuscated build itself, we can work out *all* the garble action IDs
upfront, before the obfuscated build even starts.

This fixes all of our problems. Now we know all garble build IDs
upfront, so a bunch of hacks can be entirely removed. Plus, since we
know them upfront, we can also cache them and avoid repeated calls to
"go tool buildid".

While at it, make use of the new BuildID field in Go 1.16's "list -json
-export". This avoids the vast majority of "go tool buildid" calls, as
the only ones that remain are 2 on the garble binary itself.

The numbers for Go 1.16 look very good:

	name     old time/op       new time/op       delta
	Build-8        146ms ± 4%        101ms ± 1%  -31.01%  (p=0.002 n=6+6)

	name     old bin-B         new bin-B         delta
	Build-8        6.61M ± 0%        6.60M ± 0%   -0.09%  (p=0.002 n=6+6)

	name     old sys-time/op   new sys-time/op   delta
	Build-8        321ms ± 7%        202ms ± 6%  -37.11%  (p=0.002 n=6+6)

	name     old user-time/op  new user-time/op  delta
	Build-8        538ms ± 4%        414ms ± 4%  -23.12%  (p=0.002 n=6+6)
											
										
										
											3 years ago
+									importCfg := flagValue(flags, "-importcfg")
 									if importCfg == "" {
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+										return "", fmt.Errorf("could not find -importcfg argument")
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+									}
-												all: replace uses of the deprecated ioutil

Now that we require Go 1.16, we can simplify code by removing ioutil.

											
										
										
											3 years ago
+									data, err := os.ReadFile(importCfg)
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+									if err != nil {
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+										return "", err
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+									}
-												Use latest Binject/debug version to support importmap directives, fixes #146 (#189)

* Use latest Binject/debug version to support importmap directives in the importcfg file

* Uncomment line in goprivate testscript to test ImportMap

* Fixed issue where a package in specified in importmap would be hashed differently in a package that imported it, due to the mapping of import paths.

Also commented out the 'net' import in the goprivate testscript (again) due to cgo compile errors
											
										
										
											4 years ago
-												stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
											
										
										
											3 years ago
+									var packagefiles, importmaps [][2]string
-												avoid one more call to 'go tool buildid' (#253)

We use it to get the content ID of garble's binary, which is used for
both the garble action IDs, as well as 'go tool compile -V=full'.

Since those two happen in separate processes, both used to call 'go tool
buildid' separately. Store it in the gob cache the first time, and reuse
it the second time.

Since each call to cmd/go costs about 10ms (new process, running its
many init funcs, etc), this results in a nice speed-up for our small
benchmark. Most builds will take many seconds though, so note that a
~15ms speedup there will likely not be noticeable.

While at it, simplify the buildInfo global, as now it just contains a
map representation of the -importcfg contents. It now has better names,
docs, and a simpler representation.

We also stop using the term "garbled import", as it was a bit confusing.
"obfuscated types.Package" is a much better description.

	name     old time/op       new time/op       delta
	Build-8        106ms ± 1%         92ms ± 0%  -14.07%  (p=0.010 n=6+4)

	name     old bin-B         new bin-B         delta
	Build-8        6.60M ± 0%        6.60M ± 0%   -0.01%  (p=0.002 n=6+6)

	name     old sys-time/op   new sys-time/op   delta
	Build-8        208ms ± 5%        149ms ± 3%  -28.27%  (p=0.004 n=6+5)

	name     old user-time/op  new user-time/op  delta
	Build-8        433ms ± 3%        384ms ± 3%  -11.35%  (p=0.002 n=6+6)
											
										
										
											3 years ago
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									// using for track required but not imported packages
 									var newIndirectImports map[string]bool
 									if requiredPkgs != nil {
 										newIndirectImports = make(map[string]bool)
 										for _, pkg := range requiredPkgs {
 											newIndirectImports[pkg] = true
 										}
 									}
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+									for _, line := range strings.Split(string(data), "\n") {
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+										if line == "" || strings.HasPrefix(line, "#") {
-												start hashing identifiers

											
										
										
											5 years ago
+											continue
 										}
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+										verb, args, found := strings.Cut(line, " ")
 										if !found {
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+											continue
 										}
-												Use latest Binject/debug version to support importmap directives, fixes #146 (#189)

* Use latest Binject/debug version to support importmap directives in the importcfg file

* Uncomment line in goprivate testscript to test ImportMap

* Fixed issue where a package in specified in importmap would be hashed differently in a package that imported it, due to the mapping of import paths.

Also commented out the 'net' import in the goprivate testscript (again) due to cgo compile errors
											
										
										
											4 years ago
+										switch verb {
 										case "importmap":
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+											beforePath, afterPath, found := strings.Cut(args, "=")
 											if !found {
-												Use latest Binject/debug version to support importmap directives, fixes #146 (#189)

* Use latest Binject/debug version to support importmap directives in the importcfg file

* Uncomment line in goprivate testscript to test ImportMap

* Fixed issue where a package in specified in importmap would be hashed differently in a package that imported it, due to the mapping of import paths.

Also commented out the 'net' import in the goprivate testscript (again) due to cgo compile errors
											
										
										
											4 years ago
+												continue
 											}
-												stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
											
										
										
											3 years ago
+											importmaps = append(importmaps, [2]string{beforePath, afterPath})
-												Use latest Binject/debug version to support importmap directives, fixes #146 (#189)

* Use latest Binject/debug version to support importmap directives in the importcfg file

* Uncomment line in goprivate testscript to test ImportMap

* Fixed issue where a package in specified in importmap would be hashed differently in a package that imported it, due to the mapping of import paths.

Also commented out the 'net' import in the goprivate testscript (again) due to cgo compile errors
											
										
										
											4 years ago
+										case "packagefile":
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+											importPath, objectPath, found := strings.Cut(args, "=")
 											if !found {
-												Use latest Binject/debug version to support importmap directives, fixes #146 (#189)

* Use latest Binject/debug version to support importmap directives in the importcfg file

* Uncomment line in goprivate testscript to test ImportMap

* Fixed issue where a package in specified in importmap would be hashed differently in a package that imported it, due to the mapping of import paths.

Also commented out the 'net' import in the goprivate testscript (again) due to cgo compile errors
											
										
										
											4 years ago
+												continue
 											}
-												stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
											
										
										
											3 years ago
+											packagefiles = append(packagefiles, [2]string{importPath, objectPath})
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+											delete(newIndirectImports, importPath)
-												start hashing identifiers

											
										
										
											5 years ago
+										}
 									}
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
 									// Produce the modified importcfg file.
-												properly fix obfuscated imports with their package names (#245)

The TODO I left there didn't take long to surface as a bug. If the
package path ends with a word containing a hyphen, that's not a valid
identifier, so we end up with invalid Go syntax.

Add that test case, as well as one where an import was already named.

To fix the issue, we just need to use the package name we got from
'go list -json'.

Fixes #243.
											
										
										
											3 years ago
+									// This is mainly replacing the obfuscated paths.
 									// Note that we range over maps, so this is non-deterministic, but that
 									// should not matter as the file is treated like a lookup table.
-												all: replace uses of the deprecated ioutil

Now that we require Go 1.16, we can simplify code by removing ioutil.

											
										
										
											3 years ago
+									newCfg, err := os.CreateTemp(sharedTempDir, "importcfg")
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+									if err != nil {
 										return "", err
 									}
-												stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
											
										
										
											3 years ago
+									for _, pair := range importmaps {
 										beforePath, afterPath := pair[0], pair[1]
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										lpkg, err := listPackage(tf.curPkg, beforePath)
-												concentrate and simplify "to obfuscate" logic

Back in the day, we used to call toObfuscate anytime we needed to know
whether a package should be obfuscated.
More recently, we started computing via the ToObfuscate field,
which then gets shared with all sub-processes via sharedCache.

We still had two places that directly called toObfuscate.
Replace those with ToObfuscate, and inline toObfuscate into shared.go.

obfuscatedImportPath is also a potential footgun for main packages.
Some use cases always want the original "main" package name,
such as for use in the compiler's "-p main" flag,
while other cases want the obfuscated package import path,
such as the entries in importcfg files.

Since each of these call sites handles the edge case well,
obfuscatedImportPath now panics on main packages to avoid any misuse.

Finally, test that we never leak main package paths via ldflags.txt.
We never did, but it's good to make sure.

Overall, this avoids confusion and trims the size of main.go a bit.

											
										
										
											2 years ago
+										if err != nil {
-												don't panic when we can error as easily

Panicking in small helpers or in funcs that don't return error
has proved useful to keep code easier to maintain,
particularly for cases that should typically never happen.

However, in these cases we can error just as easily.
In particular, I was getting a panic whenever I forgot
that I was running garble with Go master (1.23), which is over the top.

											
										
										
											4 months ago
+											return "", err
-												concentrate and simplify "to obfuscate" logic

Back in the day, we used to call toObfuscate anytime we needed to know
whether a package should be obfuscated.
More recently, we started computing via the ToObfuscate field,
which then gets shared with all sub-processes via sharedCache.

We still had two places that directly called toObfuscate.
Replace those with ToObfuscate, and inline toObfuscate into shared.go.

obfuscatedImportPath is also a potential footgun for main packages.
Some use cases always want the original "main" package name,
such as for use in the compiler's "-p main" flag,
while other cases want the obfuscated package import path,
such as the entries in importcfg files.

Since each of these call sites handles the edge case well,
obfuscatedImportPath now panics on main packages to avoid any misuse.

Finally, test that we never leak main package paths via ldflags.txt.
We never did, but it's good to make sure.

Overall, this avoids confusion and trims the size of main.go a bit.

											
										
										
											2 years ago
+										}
 										if lpkg.ToObfuscate {
-												fix link errors when importing crypto/ecdsa (#262)

First, we had some link errors such as:

	cannot find package J6OzO8GN (using -importcfg)

This was caused by the code that writes an updated importcfg, which did
not handle import maps well. That code is now fixed, and we also add an
obfuscatedImportPath method for clarity.

Once fixed, we ran into other link errors:

	Pw3g97ww.addVW: relocation target Pw3g97ww.addVWlarge not defined

After some digging, the cause of those is assembly code that we do not
yet support obfuscating. #261 tracks that.

Meanwhile, to fix "GOPRIVATE=* garble build" and to be able to have a
test for the original import path bug, we add the packages which use
that form of assembly code to runtimeRelated - math/big and
crypto/sha512. There might be more, but these were the ones found by
trying to link crypto/tls, a fairly common dependency.

Fixes #256.
											
										
										
											3 years ago
+											// Note that beforePath is not the canonical path.
 											// For beforePath="vendor/foo", afterPath and
 											// lpkg.ImportPath can be just "foo".
 											// Don't use obfuscatedImportPath here.
-												make obfuscation fully deterministic with -seed

The default behavior of garble is to seed via the build inputs,
including the build IDs of the entire Go build of each package.
This works well as a default, and does give us determinism,
but it means that building for different platforms
will result in different obfuscation per platform.

Instead, when -seed is provided, don't use any other hash seed or salt.
This means that a particular Go name will be obfuscated the same way
as long as the seed, package path, and name itself remain constant.

In other words, when the user supplies a custom -seed,
we assume they know what they're doing in terms of storage and rotation.

Expand the README docs with more examples and detail.

Fixes #449.

											
										
										
											2 years ago
+											beforePath = hashWithPackage(lpkg, beforePath)
-												fix link errors when importing crypto/ecdsa (#262)

First, we had some link errors such as:

	cannot find package J6OzO8GN (using -importcfg)

This was caused by the code that writes an updated importcfg, which did
not handle import maps well. That code is now fixed, and we also add an
obfuscatedImportPath method for clarity.

Once fixed, we ran into other link errors:

	Pw3g97ww.addVW: relocation target Pw3g97ww.addVWlarge not defined

After some digging, the cause of those is assembly code that we do not
yet support obfuscating. #261 tracks that.

Meanwhile, to fix "GOPRIVATE=* garble build" and to be able to have a
test for the original import path bug, we add the packages which use
that form of assembly code to runtimeRelated - math/big and
crypto/sha512. There might be more, but these were the ones found by
trying to link crypto/tls, a fairly common dependency.

Fixes #256.
											
										
										
											3 years ago
 											afterPath = lpkg.obfuscatedImportPath()
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+										}
 										fmt.Fprintf(newCfg, "importmap %s=%s\n", beforePath, afterPath)
 									}
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
 									if len(newIndirectImports) > 0 {
 										f, err := os.Open(filepath.Join(sharedTempDir, actionGraphFileName))
 										if err != nil {
 											return "", fmt.Errorf("cannot open action graph file: %v", err)
 										}
 										defer f.Close()
 										var actions []struct {
 											Mode    string
 											Package string
 											Objdir  string
 										}
 										if err := json.NewDecoder(f).Decode(&actions); err != nil {
 											return "", fmt.Errorf("cannot parse action graph file: %v", err)
 										}
 										// theoretically action graph can be long, to optimise it process it in one pass
 										// with an early exit when all the required imports are found
 										for _, action := range actions {
 											if action.Mode != "build" {
 												continue
 											}
 											if ok := newIndirectImports[action.Package]; !ok {
 												continue
 											}
 											packagefiles = append(packagefiles, [2]string{action.Package, filepath.Join(action.Objdir, "_pkg_.a")}) // file name hardcoded in compiler
 											delete(newIndirectImports, action.Package)
 											if len(newIndirectImports) == 0 {
 												break
 											}
 										}
 										if len(newIndirectImports) > 0 {
 											return "", fmt.Errorf("cannot resolve required packages from action graph file: %v", requiredPkgs)
 										}
 									}
-												stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
											
										
										
											3 years ago
+									for _, pair := range packagefiles {
 										impPath, pkgfile := pair[0], pair[1]
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										lpkg, err := listPackage(tf.curPkg, impPath)
-												concentrate and simplify "to obfuscate" logic

Back in the day, we used to call toObfuscate anytime we needed to know
whether a package should be obfuscated.
More recently, we started computing via the ToObfuscate field,
which then gets shared with all sub-processes via sharedCache.

We still had two places that directly called toObfuscate.
Replace those with ToObfuscate, and inline toObfuscate into shared.go.

obfuscatedImportPath is also a potential footgun for main packages.
Some use cases always want the original "main" package name,
such as for use in the compiler's "-p main" flag,
while other cases want the obfuscated package import path,
such as the entries in importcfg files.

Since each of these call sites handles the edge case well,
obfuscatedImportPath now panics on main packages to avoid any misuse.

Finally, test that we never leak main package paths via ldflags.txt.
We never did, but it's good to make sure.

Overall, this avoids confusion and trims the size of main.go a bit.

											
										
										
											2 years ago
+										if err != nil {
-												skip unnecessary "refusing to list" test errors

The added test case reproduces the failure if we uncomment the added
"continue" line in processImportCfg:

	# test/bar/exporttest [test/bar/exporttest.test]
	panic: refusing to list non-dependency package: test/bar/exporttest

	goroutine 1 [running]:
	mvdan.cc/garble.processImportCfg({0xc000166780?, 0xc0001f4a70?, 0x2?})
		/home/mvdan/src/garble/main.go:983 +0x58b
	mvdan.cc/garble.transformCompile({0xc000124020?, 0x11?, 0x12?})
		/home/mvdan/src/garble/main.go:736 +0x338

It seems like a quirk of cmd/go that it includes a redundant packagefile
line in this particular edge case, but it's generally harmless for "go
build". For "garble build" it's also harmless in principle, but in
practice we had sanity checks that got upset by the unexpected line.

For now, notice the edge case and ignore it.

Fixes #522.

											
										
										
											2 years ago
+											// TODO: it's unclear why an importcfg can include an import path
 											// that's not a dependency in an edge case with "go test ./...".
 											// See exporttest/*.go in testdata/scripts/test.txt.
 											// For now, spot the pattern and avoid the unnecessary error;
 											// the dependency is unused, so the packagefile line is redundant.
-												drop support for Go 1.20

Go 1.21.0 was released in August 2023, so our upcoming release
will no longer support the Go 1.20 release series.

The first Go 1.22 release candidate is also due in December 2023,
less than a month from now, so dropping 1.20 will simplify 1.22 work.

											
										
										
											7 months ago
+											// This still triggers as of go1.21.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+											if strings.HasSuffix(tf.curPkg.ImportPath, ".test]") && strings.HasPrefix(tf.curPkg.ImportPath, impPath) {
-												skip unnecessary "refusing to list" test errors

The added test case reproduces the failure if we uncomment the added
"continue" line in processImportCfg:

	# test/bar/exporttest [test/bar/exporttest.test]
	panic: refusing to list non-dependency package: test/bar/exporttest

	goroutine 1 [running]:
	mvdan.cc/garble.processImportCfg({0xc000166780?, 0xc0001f4a70?, 0x2?})
		/home/mvdan/src/garble/main.go:983 +0x58b
	mvdan.cc/garble.transformCompile({0xc000124020?, 0x11?, 0x12?})
		/home/mvdan/src/garble/main.go:736 +0x338

It seems like a quirk of cmd/go that it includes a redundant packagefile
line in this particular edge case, but it's generally harmless for "go
build". For "garble build" it's also harmless in principle, but in
practice we had sanity checks that got upset by the unexpected line.

For now, notice the edge case and ignore it.

Fixes #522.

											
										
										
											2 years ago
+												continue
 											}
-												don't panic when we can error as easily

Panicking in small helpers or in funcs that don't return error
has proved useful to keep code easier to maintain,
particularly for cases that should typically never happen.

However, in these cases we can error just as easily.
In particular, I was getting a panic whenever I forgot
that I was running garble with Go master (1.23), which is over the top.

											
										
										
											4 months ago
+											return "", err
-												concentrate and simplify "to obfuscate" logic

Back in the day, we used to call toObfuscate anytime we needed to know
whether a package should be obfuscated.
More recently, we started computing via the ToObfuscate field,
which then gets shared with all sub-processes via sharedCache.

We still had two places that directly called toObfuscate.
Replace those with ToObfuscate, and inline toObfuscate into shared.go.

obfuscatedImportPath is also a potential footgun for main packages.
Some use cases always want the original "main" package name,
such as for use in the compiler's "-p main" flag,
while other cases want the obfuscated package import path,
such as the entries in importcfg files.

Since each of these call sites handles the edge case well,
obfuscatedImportPath now panics on main packages to avoid any misuse.

Finally, test that we never leak main package paths via ldflags.txt.
We never did, but it's good to make sure.

Overall, this avoids confusion and trims the size of main.go a bit.

											
										
										
											2 years ago
+										}
 										if lpkg.Name != "main" {
-												fix link errors when importing crypto/ecdsa (#262)

First, we had some link errors such as:

	cannot find package J6OzO8GN (using -importcfg)

This was caused by the code that writes an updated importcfg, which did
not handle import maps well. That code is now fixed, and we also add an
obfuscatedImportPath method for clarity.

Once fixed, we ran into other link errors:

	Pw3g97ww.addVW: relocation target Pw3g97ww.addVWlarge not defined

After some digging, the cause of those is assembly code that we do not
yet support obfuscating. #261 tracks that.

Meanwhile, to fix "GOPRIVATE=* garble build" and to be able to have a
test for the original import path bug, we add the packages which use
that form of assembly code to runtimeRelated - math/big and
crypto/sha512. There might be more, but these were the ones found by
trying to link crypto/tls, a fairly common dependency.

Fixes #256.
											
										
										
											3 years ago
+											impPath = lpkg.obfuscatedImportPath()
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+										}
-												stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
											
										
										
											3 years ago
+										fmt.Fprintf(newCfg, "packagefile %s=%s\n", impPath, pkgfile)
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+									}
-												fix and re-enable "garble test" (#268)

With the many refactors building up to v0.1.0, we broke "garble test" as
we no longer dealt with test packages well.

Luckily, now that we can depend on TOOLEXEC_IMPORTPATH, we can support
the test command again, as we can always figure out what package we're
currently compiling, without having to track a "main" package.

Note that one major pitfall there is test packages, where
TOOLEXEC_IMPORTPATH does not agree with ImportPath from "go list -json".
However, we can still work around that with a bit of glue code, which is
also copiously documented.

The second change necessary is to consider test packages private
depending on whether their non-test package is private or not. This can
be done via the ForTest field in "go list -json".

The third change is to obfuscate "_testmain.go" files, which are the
code-generated main functions which actually run tests. We used to not
need to obfuscate them, since test function names are never obfuscated
and we used to not obfuscate import paths at compilation time. Now we do
rewrite import paths, so we must do that for "_testmain.go" too.

The fourth change is to re-enable test.txt, and expand it with more
sanity checks and edge cases.

Finally, document "garble test" again.

Fixes #241.
											
										
										
											3 years ago
 									// Uncomment to debug the transformed importcfg. Do not delete.
 									// newCfg.Seek(0, 0)
 									// io.Copy(os.Stderr, newCfg)
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+									if err := newCfg.Close(); err != nil {
 										return "", err
 									}
 									return newCfg.Name(), nil
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+								}
-												always require one argument for "reverse"

The "reverse" command had many levels of optional arguments:

	garble [garble flags] reverse [build flags] [package] [files]

This was pretty confusing,
and could easily lead to people running the command incorrectly:

	# note that output.txt isn't a Go package!
	garble reverse output.txt

Moreover, it made the handling of Go build flags pretty confusing.
Should the command below work?

	garble reverse -tags=mytag

It also made it easy to not notice that one must supply the main package
to properly reverse some text that it produced, like a panic message.
With the package path being implicit,
one could mistakenly provide the wrong package by running garble
in a directory containing a different package.

See #394.

											
										
										
											3 years ago
+								type (
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+									funcFullName = string // as per go/types.Func.FullName
 									objectString = string // as per recordedObjectString
 									typeName struct {
-												avoid panic when embedding a builtin alias

TypeName.Pkg is documented as:

    Pkg returns the package to which the object belongs.
    The result is nil for labels and objects in the Universe scope.

When a struct type embeds a builtin alias type, such as byte,
this would lead to a panic since we assumed we could use the Pkg method.

Fixes #798.

											
										
										
											7 months ago
+										PkgPath string // empty if builtin
 										Name    string
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+									}
-												always require one argument for "reverse"

The "reverse" command had many levels of optional arguments:

	garble [garble flags] reverse [build flags] [package] [files]

This was pretty confusing,
and could easily lead to people running the command incorrectly:

	# note that output.txt isn't a Go package!
	garble reverse output.txt

Moreover, it made the handling of Go build flags pretty confusing.
Should the command below work?

	garble reverse -tags=mytag

It also made it easy to not notice that one must supply the main package
to properly reverse some text that it produced, like a panic message.
With the package path being implicit,
one could mistakenly provide the wrong package by running garble
in a directory containing a different package.

See #394.

											
										
										
											3 years ago
+								)
-												detect more std API calls which use reflection

Before, we would just notice direct calls to reflect's TypeOf and
ValueOf. Any other uses of reflection, such as encoding/json or
google.golang.org/protobuf, would require hints as documented by the
README.

Issue #162 outlines some ways we could fix this issue in a general way,
automatically detecting what functions use reflection on their parameters,
even for third party API funcs.

However, that goal is pretty significant in terms of code and effort.
As a temporary improvement, we can expand the list of "known" reflection
APIs via a static table.

Since this table is keyed by "func full name" strings, we could
potentially include third party APIs, such as:

	google.golang.org/protobuf/proto.Marshal

However, for now simply include all the std APIs we know about.
If we fail to do the proper fix for automatic detection in the future,
we can then fall back to expanding this global table for third parties.

Update the README's docs, to clarify that the hint is not always
necessary anymore.

Also update the reflect.txt test to stop using the hint for encoding/json,
and to also start testing text/template with a method call.
While at it, I noticed that we weren't testing the println outputs,
as they'd go to stderr - fix that too.

Updates #162.

											
										
										
											3 years ago
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+								// pkgCache contains information about a package that will be stored in fsCache.
 								// Note that pkgCache is "deep", containing information about all packages
 								// which are transitive dependencies as well.
-												declare a type for cachedOutput

To properly make our cache robust, we'll need to be able to compute
cache entries for dependencies as needed if they are missing.
So we'll need to create more of these struct values in the code.

Rename cachedOutput to curPkgCache, to clarify that it relates
to the current package.

While here, remove the "known" prefix on all pkgCache fields.
All of the names still make perfect sense without it.

											
										
										
											1 year ago
+								type pkgCache struct {
 									// ReflectAPIs is a static record of what std APIs use reflection on their
-												stop relying on nested "go list -toolexec" calls (#422)

We rely on importcfg files to load type info for obfuscated packages.
We use this type information to remember what names we didn't obfuscate.
Unfortunately, indirect dependencies aren't listed in importcfg files,
so we relied on extra "go list -toolexec" calls to locate object files.

This worked fine, but added a significant amount of complexity.
The extra "go list -export -toolexec=garble" invocations weren't slow,
as they avoided rebuilding or re-obfuscating thanks to the build cache.
Still, it was hard to reason about how garble runs during a build
if we might have multiple layers of -toolexec invocations.

Instead, record the export files we encounter in an incremental map,
and persist it in the build cache via the gob file we're already using.
This way, each garble invocation knows where all object files are,
even those for indirect imports.

One wrinkle is that importcfg files can point to temporary object files.
In that case, figure out its final location in the build cache.
This requires hard-coding a bit of knowledge about how GOCACHE works,
but it seems relatively harmless given how it's very little code.
Plus, if GOCACHE ever changes, it will be obvious when our code breaks.

Finally, add a TODO about potentially saving even more work.
											
										
										
											3 years ago
+									// parameters, so we can avoid obfuscating types used with them.
 									//
 									// TODO: we're not including fmt.Printf, as it would have many false positives,
 									// unless we were smart enough to detect which arguments get used as %#v or %T.
-												declare a type for cachedOutput

To properly make our cache robust, we'll need to be able to compute
cache entries for dependencies as needed if they are missing.
So we'll need to create more of these struct values in the code.

Rename cachedOutput to curPkgCache, to clarify that it relates
to the current package.

While here, remove the "known" prefix on all pkgCache fields.
All of the names still make perfect sense without it.

											
										
										
											1 year ago
+									ReflectAPIs map[funcFullName]map[int]bool
-												stop loading obfuscated type information from deps

If package P1 imports package P2, P1 needs to know which names from P2
weren't obfuscated. For instance, if P2 declares T2 and does
"reflect.TypeOf(T2{...})", then P2 won't obfuscate the name T2, and
neither should P1.

This information should flow from P2 to P1, as P2 builds before
P1. We do this via obfuscatedTypesPackage; P1 loads the type information
of the obfuscated version of P2, and does a lookup for T2. If T2 exists,
then it wasn't obfuscated.

This mechanism has served us well, but it has downsides:

1) It wastes CPU; we load the type information for the entire package.

2) It's complex; for instance, we need KnownObjectFiles as an extra.

3) It makes our code harder to understand, as we load both the original
   and obfuscated type informaiton.

Instead, we now have each package record what names were not obfuscated
as part of its cachedOuput file. Much like KnownObjectFiles, the map
records incrementally through the import graph, to avoid having to load
cachedOutput files for indirect dependencies.

We shouldn't need to worry about those maps getting large;
we only skip obfuscating declared names in a few uncommon scenarios,
such as the use of reflection or cgo's "//export".

Since go/types is relatively allocation-heavy, and the export files
contain a lot of data, we get a nice speed-up:

	name      old time/op         new time/op         delta
	Build-16          11.5s ± 2%          11.1s ± 3%  -3.77%  (p=0.008 n=5+5)

	name      old bin-B           new bin-B           delta
	Build-16          5.15M ± 0%          5.15M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          375ms ± 3%          341ms ± 6%  -8.96%  (p=0.008 n=5+5)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          283ms ±17%          289ms ±13%    ~     (p=0.841 n=5+5)

	name      old user-time/op    new user-time/op    delta
	Build-16          687ms ± 6%          664ms ± 7%    ~     (p=0.548 n=5+5)

Fixes #456.
Updates #475.

											
										
										
											2 years ago
-												apply TODO to rename "cannot obfuscate" APIs

They have been exclusively about reflect for over a year now.
Make that clearer, and update the docs as well.

											
										
										
											1 year ago
+									// ReflectObjects is filled with the fully qualified names from each
 									// package that we cannot obfuscate due to reflection.
 									// The included objects are named types and their fields,
 									// since it is those names being obfuscated that could break the use of reflect.
 									//
-												stop loading obfuscated type information from deps

If package P1 imports package P2, P1 needs to know which names from P2
weren't obfuscated. For instance, if P2 declares T2 and does
"reflect.TypeOf(T2{...})", then P2 won't obfuscate the name T2, and
neither should P1.

This information should flow from P2 to P1, as P2 builds before
P1. We do this via obfuscatedTypesPackage; P1 loads the type information
of the obfuscated version of P2, and does a lookup for T2. If T2 exists,
then it wasn't obfuscated.

This mechanism has served us well, but it has downsides:

1) It wastes CPU; we load the type information for the entire package.

2) It's complex; for instance, we need KnownObjectFiles as an extra.

3) It makes our code harder to understand, as we load both the original
   and obfuscated type informaiton.

Instead, we now have each package record what names were not obfuscated
as part of its cachedOuput file. Much like KnownObjectFiles, the map
records incrementally through the import graph, to avoid having to load
cachedOutput files for indirect dependencies.

We shouldn't need to worry about those maps getting large;
we only skip obfuscating declared names in a few uncommon scenarios,
such as the use of reflection or cgo's "//export".

Since go/types is relatively allocation-heavy, and the export files
contain a lot of data, we get a nice speed-up:

	name      old time/op         new time/op         delta
	Build-16          11.5s ± 2%          11.1s ± 3%  -3.77%  (p=0.008 n=5+5)

	name      old bin-B           new bin-B           delta
	Build-16          5.15M ± 0%          5.15M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          375ms ± 3%          341ms ± 6%  -8.96%  (p=0.008 n=5+5)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          283ms ±17%          289ms ±13%    ~     (p=0.841 n=5+5)

	name      old user-time/op    new user-time/op    delta
	Build-16          687ms ± 6%          664ms ± 7%    ~     (p=0.548 n=5+5)

Fixes #456.
Updates #475.

											
										
										
											2 years ago
+									// This record is necessary for knowing what names from imported packages
 									// weren't obfuscated, so we can obfuscate their local uses accordingly.
-												apply TODO to rename "cannot obfuscate" APIs

They have been exclusively about reflect for over a year now.
Make that clearer, and update the docs as well.

											
										
										
											1 year ago
+									ReflectObjects map[objectString]struct{}
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
-												declare a type for cachedOutput

To properly make our cache robust, we'll need to be able to compute
cache entries for dependencies as needed if they are missing.
So we'll need to create more of these struct values in the code.

Rename cachedOutput to curPkgCache, to clarify that it relates
to the current package.

While here, remove the "known" prefix on all pkgCache fields.
All of the names still make perfect sense without it.

											
										
										
											1 year ago
+									// EmbeddedAliasFields records which embedded fields use a type alias.
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+									// They are the only instance where a type alias matters for obfuscation,
 									// because the embedded field name is derived from the type alias itself,
 									// and not the type that the alias points to.
 									// In that way, the type alias is obfuscated as a form of named type,
 									// bearing in mind that it may be owned by a different package.
-												declare a type for cachedOutput

To properly make our cache robust, we'll need to be able to compute
cache entries for dependencies as needed if they are missing.
So we'll need to create more of these struct values in the code.

Rename cachedOutput to curPkgCache, to clarify that it relates
to the current package.

While here, remove the "known" prefix on all pkgCache fields.
All of the names still make perfect sense without it.

											
										
										
											1 year ago
+									EmbeddedAliasFields map[objectString]typeName
 								}
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+								func (c *pkgCache) CopyFrom(c2 pkgCache) {
 									maps.Copy(c.ReflectAPIs, c2.ReflectAPIs)
 									maps.Copy(c.ReflectObjects, c2.ReflectObjects)
 									maps.Copy(c.EmbeddedAliasFields, c2.EmbeddedAliasFields)
 								}
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+								func ssaBuildPkg(pkg *types.Package, files []*ast.File, info *types.Info) *ssa.Package {
 									// Create SSA packages for all imports. Order is not significant.
 									ssaProg := ssa.NewProgram(fset, 0)
 									created := make(map[*types.Package]bool)
 									var createAll func(pkgs []*types.Package)
 									createAll = func(pkgs []*types.Package) {
 										for _, p := range pkgs {
 											if !created[p] {
 												created[p] = true
 												ssaProg.CreatePackage(p, nil, nil, true)
 												createAll(p.Imports())
 											}
 										}
 									}
 									createAll(pkg.Imports())
 									ssaPkg := ssaProg.CreatePackage(pkg, files, info, false)
 									ssaPkg.Build()
 									return ssaPkg
 								}
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+								func openCache() (*cache.Cache, error) {
 									// Use a subdirectory for the hashed build cache, to clarify what it is,
 									// and to allow us to have other directories or files later on without mixing.
-												internal/linker: place files under GARBLE_CACHE

This means we now have a unified cache directory for garble,
which is now documented in the README.

I considered using the same hash-based cache used for pkgCache,
but decided against it since that cache implementation only stores
regular files without any executable bits set.
We could force the cache package to do what we want here,
but I'm leaning against it for now given that single files work OK.

											
										
										
											1 year ago
+									dir := filepath.Join(sharedCache.CacheDir, "build")
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+									if err := os.MkdirAll(dir, 0o777); err != nil {
 										return nil, err
 									}
 									return cache.Open(dir)
-												avoid a filesystem race with build cache entries

After the last commit, I started seeing seemingly random test failures:

    --- FAIL: TestScripts/cgo (1.17s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # runtime/internal/math
            EOF
            # internal/bytealg
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/cgo.txt:3: unexpected command failure

    --- FAIL: TestScripts/reflect (8.63s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # math
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/reflect.txt:3: unexpected command failure

    --- FAIL: TestScripts/linkname (8.72s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # math
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/linkname.txt:3: unexpected command failure

After some investigation,
it turned out that concurrent "garble build" processes
were writing to the same build cache paths at the same time.
This is effectively a filesystem race,
and encoding/gob could error when reading partly written files.

To fix this problem,
use a cache path that changes according to the obfuscated build.
See garbleExportFile.

Note that the data we store in that file does not vary with obfuscation.
We could fix the filesystem race by adding locking around the old path.
However, we'll want to cache data that does vary with garble flags,
such as the -debugdir source code.

											
										
										
											3 years ago
+								}
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+								func loadPkgCache(lpkg *listedPackage, pkg *types.Package, files []*ast.File, info *types.Info, ssaPkg *ssa.Package) (pkgCache, error) {
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+									fsCache, err := openCache()
 									if err != nil {
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+										return pkgCache{}, err
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+									}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									filename, _, err := fsCache.GetFile(lpkg.GarbleActionID)
-												merge the two "known cannot obfuscate" maps

Per the TODOs that I left myself in the last commit.
As expected, this change allows tidying up the code a bit,
makes our use of caching a bit more consistent,
and also allows us to load the current package from the cache.

											
										
										
											1 year ago
+									// Already in the cache; load it directly.
 									if err == nil {
 										f, err := os.Open(filename)
 										if err != nil {
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+											return pkgCache{}, err
-												merge the two "known cannot obfuscate" maps

Per the TODOs that I left myself in the last commit.
As expected, this change allows tidying up the code a bit,
makes our use of caching a bit more consistent,
and also allows us to load the current package from the cache.

											
										
										
											1 year ago
+										}
 										defer f.Close()
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+										var loaded pkgCache
 										if err := gob.NewDecoder(f).Decode(&loaded); err != nil {
 											return pkgCache{}, fmt.Errorf("gob decode: %w", err)
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+										}
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+										return loaded, nil
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+									}
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									return computePkgCache(fsCache, lpkg, pkg, files, info, ssaPkg)
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+								}
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+								func computePkgCache(fsCache *cache.Cache, lpkg *listedPackage, pkg *types.Package, files []*ast.File, info *types.Info, ssaPkg *ssa.Package) (pkgCache, error) {
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+									// Not yet in the cache. Load the cache entries for all direct dependencies,
 									// build our cache entry, and write it to disk.
 									// Note that practically all errors from Cache.GetFile are a cache miss;
 									// for example, a file might exist but be empty if another process
 									// is filling the same cache entry concurrently.
 									//
 									// TODO: if A (curPkg) imports B and C, and B also imports C,
 									// then loading the gob files from both B and C is unnecessary;
 									// loading B's gob file would be enough. Is there an easy way to do that?
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+									computed := pkgCache{
 										ReflectAPIs: map[funcFullName]map[int]bool{
 											"reflect.TypeOf":  {0: true},
 											"reflect.ValueOf": {0: true},
 										},
 										ReflectObjects:      map[objectString]struct{}{},
 										EmbeddedAliasFields: map[objectString]typeName{},
 									}
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+									for _, imp := range lpkg.Imports {
 										if imp == "C" {
 											// `go list -json` shows "C" in Imports but not Deps.
 											// See https://go.dev/issue/60453.
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+											continue
 										}
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+										// Shadowing lpkg ensures we don't use the wrong listedPackage below.
 										lpkg, err := listPackage(lpkg, imp)
-												More robust reflection detection

Functions which use reflection on one of their parameters are,
now added to knownReflectAPIs automatically.

This makes most explicit hints for reflection redundant.
Simple protobuf code now works correctly when obfuscated.

Fixes #162
Fixes #373

											
										
										
											3 years ago
+										if err != nil {
-												don't panic when we can error as easily

Panicking in small helpers or in funcs that don't return error
has proved useful to keep code easier to maintain,
particularly for cases that should typically never happen.

However, in these cases we can error just as easily.
In particular, I was getting a panic whenever I forgot
that I was running garble with Go master (1.23), which is over the top.

											
										
										
											4 months ago
+											return computed, err
-												More robust reflection detection

Functions which use reflection on one of their parameters are,
now added to knownReflectAPIs automatically.

This makes most explicit hints for reflection redundant.
Simple protobuf code now works correctly when obfuscated.

Fixes #162
Fixes #373

											
										
										
											3 years ago
+										}
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+										if lpkg.BuildID == "" {
-												avoid a filesystem race with build cache entries

After the last commit, I started seeing seemingly random test failures:

    --- FAIL: TestScripts/cgo (1.17s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # runtime/internal/math
            EOF
            # internal/bytealg
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/cgo.txt:3: unexpected command failure

    --- FAIL: TestScripts/reflect (8.63s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # math
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/reflect.txt:3: unexpected command failure

    --- FAIL: TestScripts/linkname (8.72s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # math
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/linkname.txt:3: unexpected command failure

After some investigation,
it turned out that concurrent "garble build" processes
were writing to the same build cache paths at the same time.
This is effectively a filesystem race,
and encoding/gob could error when reading partly written files.

To fix this problem,
use a cache path that changes according to the obfuscated build.
See garbleExportFile.

Note that the data we store in that file does not vary with obfuscation.
We could fix the filesystem race by adding locking around the old path.
However, we'll want to cache data that does vary with garble flags,
such as the -debugdir source code.

											
										
										
											3 years ago
+											continue // nothing to load
 										}
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+										if err := func() error { // function literal for the deferred close
 											if filename, _, err := fsCache.GetFile(lpkg.GarbleActionID); err == nil {
 												// Cache hit; append new entries to computed.
 												f, err := os.Open(filename)
 												if err != nil {
 													return err
 												}
 												defer f.Close()
 												if err := gob.NewDecoder(f).Decode(&computed); err != nil {
 													return fmt.Errorf("gob decode: %w", err)
 												}
 												return nil
 											}
 											// Missing or corrupted entry in the cache for a dependency.
 											// Could happen if GARBLE_CACHE was emptied but GOCACHE was not.
 											// Compute it, which can recurse if many entries are missing.
 											files, err := parseFiles(lpkg.Dir, lpkg.CompiledGoFiles)
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+											if err != nil {
 												return err
 											}
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+											origImporter := importerForPkg(lpkg)
 											pkg, info, err := typecheck(lpkg.ImportPath, files, origImporter)
-												More robust reflection detection

Functions which use reflection on one of their parameters are,
now added to knownReflectAPIs automatically.

This makes most explicit hints for reflection redundant.
Simple protobuf code now works correctly when obfuscated.

Fixes #162
Fixes #373

											
										
										
											3 years ago
+											if err != nil {
 												return err
 											}
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+											computedImp, err := computePkgCache(fsCache, lpkg, pkg, files, info, nil)
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+											if err != nil {
 												return err
-												avoid a filesystem race with build cache entries

After the last commit, I started seeing seemingly random test failures:

    --- FAIL: TestScripts/cgo (1.17s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # runtime/internal/math
            EOF
            # internal/bytealg
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/cgo.txt:3: unexpected command failure

    --- FAIL: TestScripts/reflect (8.63s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # math
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/reflect.txt:3: unexpected command failure

    --- FAIL: TestScripts/linkname (8.72s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # math
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/linkname.txt:3: unexpected command failure

After some investigation,
it turned out that concurrent "garble build" processes
were writing to the same build cache paths at the same time.
This is effectively a filesystem race,
and encoding/gob could error when reading partly written files.

To fix this problem,
use a cache path that changes according to the obfuscated build.
See garbleExportFile.

Note that the data we store in that file does not vary with obfuscation.
We could fix the filesystem race by adding locking around the old path.
However, we'll want to cache data that does vary with garble flags,
such as the -debugdir source code.

											
										
										
											3 years ago
+											}
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+											computed.CopyFrom(computedImp)
-												avoid a filesystem race with build cache entries

After the last commit, I started seeing seemingly random test failures:

    --- FAIL: TestScripts/cgo (1.17s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # runtime/internal/math
            EOF
            # internal/bytealg
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/cgo.txt:3: unexpected command failure

    --- FAIL: TestScripts/reflect (8.63s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # math
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/reflect.txt:3: unexpected command failure

    --- FAIL: TestScripts/linkname (8.72s)
        testscript.go:397:
            > env GOPRIVATE=test/main
            > garble build
            [stderr]
            # math
            EOF
            exit status 2
            [exit status 1]
            FAIL: testdata/scripts/linkname.txt:3: unexpected command failure

After some investigation,
it turned out that concurrent "garble build" processes
were writing to the same build cache paths at the same time.
This is effectively a filesystem race,
and encoding/gob could error when reading partly written files.

To fix this problem,
use a cache path that changes according to the obfuscated build.
See garbleExportFile.

Note that the data we store in that file does not vary with obfuscation.
We could fix the filesystem race by adding locking around the old path.
However, we'll want to cache data that does vary with garble flags,
such as the -debugdir source code.

											
										
										
											3 years ago
+											return nil
-												minor code tidying up

We don't need to nest the RemoveAll and MkdirAll calls for debugdir.

Fill the string for -toolexec= when we actually append it to goArgs.

Consistently use the objectString type alias.

Remove unnecessary parameter names in transformFuncs.

Unindent the code for switch variables by reversing the conditional.

											
										
										
											2 years ago
+										}(); err != nil {
-												support computing missing pkgCache entries

Some users had been running into "cannot load cache entry" errors,
which could happen if garble's cache files in GOCACHE were removed
when Go's own cache files were not.

Now that we've moved to our own separate cache directory,
and that we've refactored the codebase to depend less on globals
and no longer assume that we're loading info for the current package,
we can now compute a pkgCache entry for a dependency if needed.

We add a pkgCache.CopyFrom method to be able to append map entries
from one pkgCache to another without needing an encoding/gob roundtrip.

We also add a parseFiles helper, since we now have three bits of code
which need to parse a list of Go files from disk.

Fixes #708.

											
										
										
											1 year ago
+											return pkgCache{}, fmt.Errorf("pkgCache load for %s: %w", imp, err)
-												More robust reflection detection

Functions which use reflection on one of their parameters are,
now added to knownReflectAPIs automatically.

This makes most explicit hints for reflection redundant.
Simple protobuf code now works correctly when obfuscated.

Fixes #162
Fixes #373

											
										
										
											3 years ago
+										}
 									}
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									// Fill EmbeddedAliasFields from the type info.
 									for name, obj := range info.Uses {
 										obj, ok := obj.(*types.TypeName)
 										if !ok || !obj.IsAlias() {
 											continue
 										}
 										vr, _ := info.Defs[name].(*types.Var)
 										if vr == nil || !vr.Embedded() {
 											continue
 										}
 										vrStr := recordedObjectString(vr)
 										if vrStr == "" {
 											continue
 										}
 										aliasTypeName := typeName{
-												avoid panic when embedding a builtin alias

TypeName.Pkg is documented as:

    Pkg returns the package to which the object belongs.
    The result is nil for labels and objects in the Universe scope.

When a struct type embeds a builtin alias type, such as byte,
this would lead to a panic since we assumed we could use the Pkg method.

Fixes #798.

											
										
										
											7 months ago
+											Name: obj.Name(),
 										}
 										if pkg := obj.Pkg(); pkg != nil {
 											aliasTypeName.PkgPath = pkg.Path()
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+										}
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+										computed.EmbeddedAliasFields[vrStr] = aliasTypeName
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									}
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									// Fill the reflect info from SSA, which builds on top of the syntax tree and type info.
-												isolate reflect.go from updating globals directly

That is, stop reusing "transformer" as the receiver on methods,
and stop writing the results to the global curPkgCache struct.

Soon we will need to support computing pkgCache for any dependency,
not just the current package, to make the caching properly robust.
This allows us to fill reflectInspector with different values.

The explicit isolation also helps prevent bugs.
For instance, we were calling recursivelyRecordAsNotObfuscated from
transformCompile, which happens after we have loaded or saved pkgCache.
Meaning, the current package sees a larger pkgCache than its dependents.
In this particular case it wasn't causing any bugs,
since the two reflect types in question only had unexported fields,
but it's still good to treat pkgCache as read-only in transformCompile.

											
										
										
											1 year ago
+									inspector := reflectInspector{
-												track converted types when recording reflection usage

Fixes #763.
Fixes #782.
Fixes #785.
Fixes #807.
											
										
										
											7 months ago
+										pkg:             pkg,
 										checkedAPIs:     make(map[string]bool),
 										propagatedInstr: map[ssa.Instruction]bool{},
 										result:          computed, // append the results
-												isolate reflect.go from updating globals directly

That is, stop reusing "transformer" as the receiver on methods,
and stop writing the results to the global curPkgCache struct.

Soon we will need to support computing pkgCache for any dependency,
not just the current package, to make the caching properly robust.
This allows us to fill reflectInspector with different values.

The explicit isolation also helps prevent bugs.
For instance, we were calling recursivelyRecordAsNotObfuscated from
transformCompile, which happens after we have loaded or saved pkgCache.
Meaning, the current package sees a larger pkgCache than its dependents.
In this particular case it wasn't causing any bugs,
since the two reflect types in question only had unexported fields,
but it's still good to treat pkgCache as read-only in transformCompile.

											
										
										
											1 year ago
+									}
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									if ssaPkg == nil {
 										ssaPkg = ssaBuildPkg(pkg, files, info)
 									}
-												isolate reflect.go from updating globals directly

That is, stop reusing "transformer" as the receiver on methods,
and stop writing the results to the global curPkgCache struct.

Soon we will need to support computing pkgCache for any dependency,
not just the current package, to make the caching properly robust.
This allows us to fill reflectInspector with different values.

The explicit isolation also helps prevent bugs.
For instance, we were calling recursivelyRecordAsNotObfuscated from
transformCompile, which happens after we have loaded or saved pkgCache.
Meaning, the current package sees a larger pkgCache than its dependents.
In this particular case it wasn't causing any bugs,
since the two reflect types in question only had unexported fields,
but it's still good to treat pkgCache as read-only in transformCompile.

											
										
										
											1 year ago
+									inspector.recordReflection(ssaPkg)
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
 									// Unlikely that we could stream the gob encode, as cache.Put wants an io.ReadSeeker.
 									var buf bytes.Buffer
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+									if err := gob.NewEncoder(&buf).Encode(computed); err != nil {
 										return pkgCache{}, err
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+									}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									if err := fsCache.PutBytes(lpkg.GarbleActionID, buf.Bytes()); err != nil {
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+										return pkgCache{}, err
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+									}
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+									return computed, nil
-												More robust reflection detection

Functions which use reflection on one of their parameters are,
now added to knownReflectAPIs automatically.

This makes most explicit hints for reflection redundant.
Simple protobuf code now works correctly when obfuscated.

Fixes #162
Fixes #373

											
										
										
											3 years ago
+								}
-												remove duplicate go:generate directive

I hadn't noticed that cmd/bundle prints its own go:generate directive.
I guess that makes sense for the average user running it directly,
but that doesn't apply to us, and we end up with duplicate directives.

Before:

	$ go generate -n
	bundle -o cmdgo_quoted.go -prefix cmdgoQuoted cmd/internal/quoted
	go run golang.org/x/tools/cmd/bundle@v0.1.9 -o cmdgo_quoted.go -prefix cmdgoQuoted cmd/internal/quoted

After:

	$ go generate -n
	go run golang.org/x/tools/cmd/bundle@v0.1.9 -o cmdgo_quoted.go -prefix cmdgoQuoted cmd/internal/quoted
	sed -i /go:generate/d cmdgo_quoted.go

While here, I made a typo in the last release notes, because of course.
I already edited that out in the GitHub release.

											
										
										
											2 years ago
+								// cmd/bundle will include a go:generate directive in its output by default.
 								// Ours specifies a version and doesn't assume bundle is in $PATH, so drop it.
-												use x/tools version from go.mod in go:generate

Otherwise we have to update that `@semver` string
alongside the regular x/tools updates in go.mod.
There's no reason to separate the two versions either.

											
										
										
											7 months ago
+								//go:generate go run golang.org/x/tools/cmd/bundle -o cmdgo_quoted.go -prefix cmdgoQuoted cmd/internal/quoted
-												remove duplicate go:generate directive

I hadn't noticed that cmd/bundle prints its own go:generate directive.
I guess that makes sense for the average user running it directly,
but that doesn't apply to us, and we end up with duplicate directives.

Before:

	$ go generate -n
	bundle -o cmdgo_quoted.go -prefix cmdgoQuoted cmd/internal/quoted
	go run golang.org/x/tools/cmd/bundle@v0.1.9 -o cmdgo_quoted.go -prefix cmdgoQuoted cmd/internal/quoted

After:

	$ go generate -n
	go run golang.org/x/tools/cmd/bundle@v0.1.9 -o cmdgo_quoted.go -prefix cmdgoQuoted cmd/internal/quoted
	sed -i /go:generate/d cmdgo_quoted.go

While here, I made a typo in the last release notes, because of course.
I already edited that out in the GitHub release.

											
										
										
											2 years ago
+								//go:generate sed -i /go:generate/d cmdgo_quoted.go
-												add support for -ldflags using quotes

In particular, using -ldflags with -

In particular, a command like:

	garble -literals build -ldflags='-X "main.foo=foo bar"'

would fail, because we would try to use "\"main" as the package name for
the -X qualified name, with the leading quote character.

This is because we used strings.Split(ldflags, " ").
Instead, use the same quoted.Split that cmd/go uses,
copied over thanks to x/tools/cmd/bundle and go:generate.

Updates #492.

											
										
										
											2 years ago
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+								// computeLinkerVariableStrings iterates over the -ldflags arguments,
 								// filling a map with all the string values set via the linker's -X flag.
 								// TODO: can we put this in sharedCache, using objectString as a key?
-												various minor TODO cleanups

computeLinkerVariableStrinsg had an unusedargument.

Only skip obfuscating the name "FS" in the "embed" package.

The reflect methods no longer use the transformer receiver type,
so that TODO now feels unnecessary. Many methods need to be aware
of what the current types.Package is, and that seems reasonable.

We no longer use writeFileExclusive for our own cache on disk,
so the TODO about using locking or atomic writes is no longer relevant.

											
										
										
											1 year ago
+								func computeLinkerVariableStrings(pkg *types.Package) (map[*types.Var]string, error) {
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+									linkerVariableStrings := make(map[*types.Var]string)
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
-												add a TODO about a possible cache bug with -literals and -ldflags

I spent some time trying to reproduce the bug but couldn't,
so for now, make a detailed note for it.
We can come back to it if we actually run into it in the future.

Fixes #492.

											
										
										
											2 years ago
+									// TODO: this is a linker flag that affects how we obfuscate a package at
 									// compile time. Note that, if the user changes ldflags, then Go may only
 									// re-link the final binary, without re-compiling any packages at all.
 									// It's possible that this could result in:
 									//
 									//    garble -literals build -ldflags=-X=pkg.name=before # name="before"
 									//    garble -literals build -ldflags=-X=pkg.name=after  # name="before" as cached
 									//
 									// We haven't been able to reproduce this problem for now,
 									// but it's worth noting it and keeping an eye out for it in the future.
 									// If we do confirm this theoretical bug,
 									// the solution will be to either find a different solution for -literals,
 									// or to force including -ldflags into the build cache key.
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+									ldflags, err := cmdgoQuotedSplit(flagValue(sharedCache.ForwardBuildFlags, "-ldflags"))
-												add support for -ldflags using quotes

In particular, using -ldflags with -

In particular, a command like:

	garble -literals build -ldflags='-X "main.foo=foo bar"'

would fail, because we would try to use "\"main" as the package name for
the -X qualified name, with the leading quote character.

This is because we used strings.Split(ldflags, " ").
Instead, use the same quoted.Split that cmd/go uses,
copied over thanks to x/tools/cmd/bundle and go:generate.

Updates #492.

											
										
										
											2 years ago
+									if err != nil {
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+										return nil, err
-												add support for -ldflags using quotes

In particular, using -ldflags with -

In particular, a command like:

	garble -literals build -ldflags='-X "main.foo=foo bar"'

would fail, because we would try to use "\"main" as the package name for
the -X qualified name, with the leading quote character.

This is because we used strings.Split(ldflags, " ").
Instead, use the same quoted.Split that cmd/go uses,
copied over thanks to x/tools/cmd/bundle and go:generate.

Updates #492.

											
										
										
											2 years ago
+									}
 									flagValueIter(ldflags, "-X", func(val string) {
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+										// val is in the form of "foo.com/bar.name=value".
 										fullName, stringValue, found := strings.Cut(val, "=")
 										if !found {
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
+											return // invalid
 										}
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+										// fullName is "foo.com/bar.name"
 										i := strings.LastIndexByte(fullName, '.')
 										path, name := fullName[:i], fullName[i+1:]
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
 										// -X represents the main package as "main", not its import path.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										if path != pkg.Path() && (path != "main" || pkg.Name() != "main") {
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
+											return // not the current package
 										}
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+										obj, _ := pkg.Scope().Lookup(name).(*types.Var)
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
+										if obj == nil {
-												slight simplifications and alloc reductions

Reuse a buffer and a map across loop iterations, because we can.

Make recordTypeDone only track named types, as that is enough to detect
type cycles. Without named types, there can be no cycles.

These two reduce allocs by a fraction of a percent:

	name      old time/op         new time/op         delta
	Build-16          10.4s ± 2%          10.4s ± 1%    ~     (p=0.739 n=10+10)

	name      old bin-B           new bin-B           delta
	Build-16          5.51M ± 0%          5.51M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          391ms ± 9%          407ms ± 7%    ~     (p=0.095 n=10+9)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          34.5M ± 0%          34.4M ± 0%  -0.12%  (p=0.000 n=10+10)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          5.87s ± 5%          5.82s ± 5%    ~     (p=0.182 n=10+9)

It doesn't seem like much, but remember that these stats are for the
entire set of processes, where garble only accounts for about 10% of the
total wall time when compared to the compiler or linker. So a ~0.1%
decrease globally is still significant.

linkerVariableStrings is also indexed by *types.Var rather than types.Object,
since -ldflags=-X only supports setting the string value of variables.
This shouldn't make a significant difference in terms of allocs,
but at least the map is less prone to confusion with other object types.
To ensure the new code doesn't trip up on non-variables, we add test cases.

Finally, for the sake of clarity, index into the types.Info maps like
Defs and Uses rather than calling ObjectOf if we know whether the
identifier we have is a definition of a name or the use of a defined name.
This isn't better in terms of performance, as ObjectOf is a tiny method,
but just like with linkerVariableStrings before, the new code is clearer.

											
										
										
											2 years ago
+											return // no such variable; skip
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
+										}
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+										linkerVariableStrings[obj] = stringValue
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
+									})
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+									return linkerVariableStrings, nil
-												exclude identifiers used via reflection

If reflect.TypeOf or reflect.ValueOf are used on a type declared in the same package,
don't garble that type name or any of its fields.

Fixes #15.
											
										
										
											4 years ago
+								}
-												introduce a receiver type to transform a Go package

Means that we no longer have to pass a dozen parameters around, mainly
to transformGo. We can also start documenting what each of the fields
actually does, and group them better.

While at it, pkgPath and pkgScope can both be replaced by a
*types.Package, since they're both accessible via trivially cheap
methods.

											
										
										
											4 years ago
+								// transformer holds all the information and state necessary to obfuscate a
 								// single Go package.
 								type transformer struct {
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									// curPkg holds basic information about the package being currently compiled or linked.
 									curPkg *listedPackage
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+									// curPkgCache is the pkgCache for curPkg.
 									curPkgCache pkgCache
-												introduce a receiver type to transform a Go package

Means that we no longer have to pass a dozen parameters around, mainly
to transformGo. We can also start documenting what each of the fields
actually does, and group them better.

While at it, pkgPath and pkgScope can both be replaced by a
*types.Package, since they're both accessible via trivially cheap
methods.

											
										
										
											4 years ago
+									// The type-checking results; the package itself, and the Info struct.
 									pkg  *types.Package
 									info *types.Info
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+									// linkerVariableStrings records objects for variables used in -ldflags=-X flags,
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
+									// as well as the strings the user wants to inject them with.
-												separate and rename prefillObjectMaps

The name and docs on that func were wildly out of date,
since it no longer has anything to do with reflection at all.

We only use the linkerVariableStrings map with -literals,
so we can avoid the call entirely if the flag isn't set.

											
										
										
											1 year ago
+									// Used when obfuscating literals, so that we obfuscate the injected value.
-												slight simplifications and alloc reductions

Reuse a buffer and a map across loop iterations, because we can.

Make recordTypeDone only track named types, as that is enough to detect
type cycles. Without named types, there can be no cycles.

These two reduce allocs by a fraction of a percent:

	name      old time/op         new time/op         delta
	Build-16          10.4s ± 2%          10.4s ± 1%    ~     (p=0.739 n=10+10)

	name      old bin-B           new bin-B           delta
	Build-16          5.51M ± 0%          5.51M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          391ms ± 9%          407ms ± 7%    ~     (p=0.095 n=10+9)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          34.5M ± 0%          34.4M ± 0%  -0.12%  (p=0.000 n=10+10)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          5.87s ± 5%          5.82s ± 5%    ~     (p=0.182 n=10+9)

It doesn't seem like much, but remember that these stats are for the
entire set of processes, where garble only accounts for about 10% of the
total wall time when compared to the compiler or linker. So a ~0.1%
decrease globally is still significant.

linkerVariableStrings is also indexed by *types.Var rather than types.Object,
since -ldflags=-X only supports setting the string value of variables.
This shouldn't make a significant difference in terms of allocs,
but at least the map is less prone to confusion with other object types.
To ensure the new code doesn't trip up on non-variables, we add test cases.

Finally, for the sake of clarity, index into the types.Info maps like
Defs and Uses rather than calling ObjectOf if we know whether the
identifier we have is a definition of a name or the use of a defined name.
This isn't better in terms of performance, as ObjectOf is a tiny method,
but just like with linkerVariableStrings before, the new code is clearer.

											
										
										
											2 years ago
+									linkerVariableStrings map[*types.Var]string
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
-												handle aliases to foreign named types properly

When such an alias name was used to define an embedded field, we handled
that case gracefully via the code using:

	tf.info.Uses[node].(*types.TypeName)

Unfortunately, when the same field name was used elsewhere, such as a
composite literal, tf.Info.Uses gave us a *types.Var, not a
*types.TypeName, meaning we could no longer tell if this was an alias,
or what it pointed to.

Thus, we failed to obfuscate the name properly in the added test case:

	> garble build
	[stderr]
	# test/main/sub
	xxWZf66u.go:36: unknown field 'foreignAlias' in struct literal of type smhWelwn

It doesn't seem like any of the go/types APIs allows us to obtain the
*types.TypeName directly in this scenario. Thus, use a trick that we
used before: after typechecking, but before obfuscating, record all
embedded struct field *types.Var which are aliases via a map, where the
value holds the *types.TypeName for the alias.

Updates #349.

											
										
										
											3 years ago
+									// fieldToStruct helps locate struct types from any of their field
 									// objects. Useful when obfuscating field names.
 									fieldToStruct map[*types.Var]*types.Struct
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
 									// obfRand is initialized by transformCompile and used during obfuscation.
 									// It is left nil at init time, so that we only use it after it has been
 									// properly initialized with a deterministic seed.
 									// It must only be used for deterministic obfuscation;
 									// if it is used for any other purpose, we may lose determinism.
 									obfRand *mathrand.Rand
 									// origImporter is a go/types importer which uses the original versions
 									// of packages, without any obfuscation. This is helpful to make
 									// decisions on how to obfuscate our input code.
 									origImporter importerWithMap
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
 									// usedAllImportsFiles is used to prevent multiple calls of tf.useAllImports function on one file
 									// in case of simultaneously applied control flow and literals obfuscation
 									usedAllImportsFiles map[*ast.File]bool
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+								}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+								func typecheck(pkgPath string, files []*ast.File, origImporter importerWithMap) (*types.Package, *types.Info, error) {
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									info := &types.Info{
-												replace our caching inside GOCACHE with GARBLE_CACHE

For each Go package we obfuscate, we need to store information about
how we obfuscated it, which is needed when obfuscating its dependents.
For example, if A depends on B to use the type B.Foo, A needs to know
whether or not B.Foo was obfuscated; it depends on B's use of reflect.

We record this information in a gob file, which is cached on disk.
To avoid rolling our own custom cache, and since garble is so closely
connected with cmd/go already, we piggybacked off of Go's GOCACHE.
In particular, for each build cache entry per `go list`'s Export field,
we would store a "garble" sibling file with that gob content.

However, this was brittle for two reasons:

1) We were doing this without cmd/go's permission or knowledge.
   We were careful to use filename suffixes similar to Export files,
   meaning that `go clean` and other commands would treat them the same.
   However, this could confuse cmd/go at any point in the future.

2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of
   the build and test caches under control. Right now, this means that
   every 24h, any file not accessed in the last five days is deleted.
   However, that trimming heuristic is done per-file.
   If the trimming removed Garble's sibling file but not the original
   Export file, this could cause errors such as
   "cannot load garble export file" which users already ran into.

Instead, start using github.com/rogpeppe/go-internal/cache,
an exported copy of cmd/go's own cache implementation for GOCACHE.
Since we need an entirely separate directory, we introduce GARBLE_CACHE,
defaulting to the "garble" directory inside the user's cache directory.
For example, on Linux this would be ~/.cache/garble.

Inside GARBLE_CACHE, our gob file cache will be under "build",
which helps clarify that this cache is used when obfuscating Go builds,
and allows placing other kinds of caches inside GARBLE_CACHE.
For example, we already have a need for storing linker binaries,
which for now still use their own caching mechanism.

This commit does not make our cache properly resistant to removed files.
The proof is that our seed.txtar testscript still fails the second case.
However, we do rewrite all of our caching logic away from Export files,
which in itself is a considerable refactor, and we add a few TODOs.

One notable change is how we load gob files from dependencies
when building the cache entry for the current package.
We used to load the gob files from all packages in the Deps field.
However, that is the list of all _transitive_ dependencies.
Since these gob files are already flat, meaning they contain information
about all of their transitive dependencies as well, we need only load
the gob files from the direct dependencies, the Imports field.

Performance is largely unchanged, since the behavior is similar.
However, the change from Deps to Imports saves us some work,
which can be seen in the reduced mallocs per obfuscated build.

It's unclear why the binary size isn't stable.
When reverting the Deps to Imports change, it then settles at 5.386Mi,
which is almost exactly in between the two measurements below.
I'm not sure why, but that metric appears to be slightly unstable.

    goos: linux
    goarch: amd64
    pkg: mvdan.cc/garble
    cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics
            │    old     │             new              │
            │   sec/op   │   sec/op    vs base          │
    Build-8   11.09 ± 1%   11.08 ± 1%  ~ (p=0.796 n=10)

            │     old      │                 new                 │
            │    bin-B     │    bin-B      vs base               │
    Build-8   5.390Mi ± 0%   5.382Mi ± 0%  -0.14% (p=0.000 n=10)

            │      old      │               new               │
            │ cached-sec/op │ cached-sec/op  vs base          │
    Build-8     415.5m ± 4%     421.6m ± 1%  ~ (p=0.190 n=10)

            │     old     │                new                 │
            │ mallocs/op  │ mallocs/op   vs base               │
    Build-8   35.43M ± 0%   34.05M ± 0%  -3.89% (p=0.000 n=10)

            │    old     │             new              │
            │ sys-sec/op │ sys-sec/op  vs base          │
    Build-8   5.662 ± 1%   5.701 ± 2%  ~ (p=0.280 n=10)

											
										
										
											1 year ago
+										Types:      make(map[ast.Expr]types.TypeAndValue),
 										Defs:       make(map[*ast.Ident]types.Object),
 										Uses:       make(map[*ast.Ident]types.Object),
 										Implicits:  make(map[ast.Node]types.Object),
 										Scopes:     make(map[ast.Node]*types.Scope),
 										Selections: make(map[*ast.SelectorExpr]*types.Selection),
 										Instances:  make(map[*ast.Ident]types.Instance),
 									}
-												add a couple of reminder TODOs for go/... packages

											
										
										
											7 months ago
+									// TODO(mvdan): we should probably set types.Config.GoVersion from go.mod
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+									origTypesConfig := types.Config{Importer: origImporter}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									pkg, err := origTypesConfig.Check(pkgPath, fset, files, info)
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+									if err != nil {
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+										return nil, nil, fmt.Errorf("typecheck error: %v", err)
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+									}
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									return pkg, info, err
 								}
 								func computeFieldToStruct(info *types.Info) map[*types.Var]*types.Struct {
 									done := make(map[*types.Named]bool)
 									fieldToStruct := make(map[*types.Var]*types.Struct)
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
 									// Run recordType on all types reachable via types.Info.
 									// A bit hacky, but I could not find an easier way to do this.
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									for _, obj := range info.Uses {
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+										if obj != nil {
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+											recordType(obj.Type(), nil, done, fieldToStruct)
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+										}
 									}
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									for _, obj := range info.Defs {
 										if obj != nil {
 											recordType(obj.Type(), nil, done, fieldToStruct)
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+										}
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+									}
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									for _, tv := range info.Types {
 										recordType(tv.Type, nil, done, fieldToStruct)
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+									}
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+									return fieldToStruct
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+								}
 								// recordType visits every reachable type after typechecking a package.
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+								// Right now, all it does is fill the fieldToStruct map.
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+								// Since types can be recursive, we need a map to avoid cycles.
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+								// We only need to track named types as done, as all cycles must use them.
 								func recordType(used, origin types.Type, done map[*types.Named]bool, fieldToStruct map[*types.Var]*types.Struct) {
-												fix hashing of generic field names

Trying to make Go master work, I noticed that crypto/tls still failed to
build. The reason was generic structs; we would badly obfuscate their
field names when the types are instantiated:

	> garble build
	[stderr]
	# test/main
	Z4ZpcbMj.go:4: unknown field 'FOpszkrN' in struct literal of type SYdpWfK5[string]
	Z4ZpcbMj.go:5: m8hLTotb.FypXrbTd undefined (type SYdpWfK5[string] has no field or method FypXrbTd)
	exit status 2

See the added comment for what happened and how we fixed it. And add tests.

											
										
										
											2 years ago
+									if origin == nil {
 										origin = used
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+									}
-												fix hashing of generic field names

Trying to make Go master work, I noticed that crypto/tls still failed to
build. The reason was generic structs; we would badly obfuscate their
field names when the types are instantiated:

	> garble build
	[stderr]
	# test/main
	Z4ZpcbMj.go:4: unknown field 'FOpszkrN' in struct literal of type SYdpWfK5[string]
	Z4ZpcbMj.go:5: m8hLTotb.FypXrbTd undefined (type SYdpWfK5[string] has no field or method FypXrbTd)
	exit status 2

See the added comment for what happened and how we fixed it. And add tests.

											
										
										
											2 years ago
+									type Container interface{ Elem() types.Type }
 									switch used := used.(type) {
 									case Container:
-												avoid a type assertion panic with generic code

I was wrongly assumed that, if `used` has an `Elem` method,
then `origin` must too. But it does not if it's a type parameter.

Add a test case too, which panicked before the fix.

Fixes #577.

											
										
										
											2 years ago
+										// origin may be a *types.TypeParam, which is not a Container.
 										// For now, we haven't found a need to recurse in that case.
 										// We can edit this code in the future if we find an example,
 										// because we panic if a field is not in fieldToStruct.
 										if origin, ok := origin.(Container); ok {
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+											recordType(used.Elem(), origin.Elem(), done, fieldToStruct)
-												avoid a type assertion panic with generic code

I was wrongly assumed that, if `used` has an `Elem` method,
then `origin` must too. But it does not if it's a type parameter.

Add a test case too, which panicked before the fix.

Fixes #577.

											
										
										
											2 years ago
+										}
-												fix hashing of generic field names

Trying to make Go master work, I noticed that crypto/tls still failed to
build. The reason was generic structs; we would badly obfuscate their
field names when the types are instantiated:

	> garble build
	[stderr]
	# test/main
	Z4ZpcbMj.go:4: unknown field 'FOpszkrN' in struct literal of type SYdpWfK5[string]
	Z4ZpcbMj.go:5: m8hLTotb.FypXrbTd undefined (type SYdpWfK5[string] has no field or method FypXrbTd)
	exit status 2

See the added comment for what happened and how we fixed it. And add tests.

											
										
										
											2 years ago
+									case *types.Named:
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+										if done[used] {
-												slight simplifications and alloc reductions

Reuse a buffer and a map across loop iterations, because we can.

Make recordTypeDone only track named types, as that is enough to detect
type cycles. Without named types, there can be no cycles.

These two reduce allocs by a fraction of a percent:

	name      old time/op         new time/op         delta
	Build-16          10.4s ± 2%          10.4s ± 1%    ~     (p=0.739 n=10+10)

	name      old bin-B           new bin-B           delta
	Build-16          5.51M ± 0%          5.51M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          391ms ± 9%          407ms ± 7%    ~     (p=0.095 n=10+9)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          34.5M ± 0%          34.4M ± 0%  -0.12%  (p=0.000 n=10+10)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          5.87s ± 5%          5.82s ± 5%    ~     (p=0.182 n=10+9)

It doesn't seem like much, but remember that these stats are for the
entire set of processes, where garble only accounts for about 10% of the
total wall time when compared to the compiler or linker. So a ~0.1%
decrease globally is still significant.

linkerVariableStrings is also indexed by *types.Var rather than types.Object,
since -ldflags=-X only supports setting the string value of variables.
This shouldn't make a significant difference in terms of allocs,
but at least the map is less prone to confusion with other object types.
To ensure the new code doesn't trip up on non-variables, we add test cases.

Finally, for the sake of clarity, index into the types.Info maps like
Defs and Uses rather than calling ObjectOf if we know whether the
identifier we have is a definition of a name or the use of a defined name.
This isn't better in terms of performance, as ObjectOf is a tiny method,
but just like with linkerVariableStrings before, the new code is clearer.

											
										
										
											2 years ago
+											return
 										}
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+										done[used] = true
-												fix hashing of generic field names

Trying to make Go master work, I noticed that crypto/tls still failed to
build. The reason was generic structs; we would badly obfuscate their
field names when the types are instantiated:

	> garble build
	[stderr]
	# test/main
	Z4ZpcbMj.go:4: unknown field 'FOpszkrN' in struct literal of type SYdpWfK5[string]
	Z4ZpcbMj.go:5: m8hLTotb.FypXrbTd undefined (type SYdpWfK5[string] has no field or method FypXrbTd)
	exit status 2

See the added comment for what happened and how we fixed it. And add tests.

											
										
										
											2 years ago
+										// If we have a generic struct like
 										//
 										//	type Foo[T any] struct { Bar T }
 										//
 										// then we want the hashing to use the original "Bar T",
 										// because otherwise different instances like "Bar int" and "Bar bool"
 										// will result in different hashes and the field names will break.
 										// Ensure we record the original generic struct, if there is one.
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+										recordType(used.Underlying(), used.Origin().Underlying(), done, fieldToStruct)
-												fix hashing of generic field names

Trying to make Go master work, I noticed that crypto/tls still failed to
build. The reason was generic structs; we would badly obfuscate their
field names when the types are instantiated:

	> garble build
	[stderr]
	# test/main
	Z4ZpcbMj.go:4: unknown field 'FOpszkrN' in struct literal of type SYdpWfK5[string]
	Z4ZpcbMj.go:5: m8hLTotb.FypXrbTd undefined (type SYdpWfK5[string] has no field or method FypXrbTd)
	exit status 2

See the added comment for what happened and how we fixed it. And add tests.

											
										
										
											2 years ago
+									case *types.Struct:
 										origin := origin.(*types.Struct)
-												start using some Go 1.22 features

We no longer need to worry about the scope of range variables,
we can iterate over integers directly, and we can use cmp.Or too.

I haven't paid close attention to using these everywhere.
This is mainly testing out the new features where I saw some benefit.

											
										
										
											4 months ago
+										for i := range used.NumFields() {
-												fix hashing of generic field names

Trying to make Go master work, I noticed that crypto/tls still failed to
build. The reason was generic structs; we would badly obfuscate their
field names when the types are instantiated:

	> garble build
	[stderr]
	# test/main
	Z4ZpcbMj.go:4: unknown field 'FOpszkrN' in struct literal of type SYdpWfK5[string]
	Z4ZpcbMj.go:5: m8hLTotb.FypXrbTd undefined (type SYdpWfK5[string] has no field or method FypXrbTd)
	exit status 2

See the added comment for what happened and how we fixed it. And add tests.

											
										
										
											2 years ago
+											field := used.Field(i)
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+											fieldToStruct[field] = origin
-												fix hashing of generic field names

Trying to make Go master work, I noticed that crypto/tls still failed to
build. The reason was generic structs; we would badly obfuscate their
field names when the types are instantiated:

	> garble build
	[stderr]
	# test/main
	Z4ZpcbMj.go:4: unknown field 'FOpszkrN' in struct literal of type SYdpWfK5[string]
	Z4ZpcbMj.go:5: m8hLTotb.FypXrbTd undefined (type SYdpWfK5[string] has no field or method FypXrbTd)
	exit status 2

See the added comment for what happened and how we fixed it. And add tests.

											
										
										
											2 years ago
 											if field.Embedded() {
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+												recordType(field.Type(), origin.Field(i).Type(), done, fieldToStruct)
-												fix hashing of generic field names

Trying to make Go master work, I noticed that crypto/tls still failed to
build. The reason was generic structs; we would badly obfuscate their
field names when the types are instantiated:

	> garble build
	[stderr]
	# test/main
	Z4ZpcbMj.go:4: unknown field 'FOpszkrN' in struct literal of type SYdpWfK5[string]
	Z4ZpcbMj.go:5: m8hLTotb.FypXrbTd undefined (type SYdpWfK5[string] has no field or method FypXrbTd)
	exit status 2

See the added comment for what happened and how we fixed it. And add tests.

											
										
										
											2 years ago
+											}
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+										}
 									}
-												introduce a receiver type to transform a Go package

Means that we no longer have to pass a dozen parameters around, mainly
to transformGo. We can also start documenting what each of the fields
actually does, and group them better.

While at it, pkgPath and pkgScope can both be replaced by a
*types.Package, since they're both accessible via trivially cheap
methods.

											
										
										
											4 years ago
+								}
-												ensure that all imports are used after obfuscation

garble's -literals flag and its patching of the runtime may leave unused imports.
We used to try to detect those and remove the imports,
but that was still buggy with edge cases like dot imports or renamed imports.

Moreover, it was potentially incorrect.
Completely removing an import from a package means we don't run its init funcs,
which could have side effects changing the behavior of a program.
As an example, database/sql drivers are registered at init time.

Instead, for each import in an obfuscated Go file,
add an unnamed declaration which references the imported package.
This may not be necessary for all imported packages,
as only a minority become unused due to garble,
but it's also relatively harmless to do so.

Fixes #658.
											
										
										
											1 year ago
+								// isSafeForInstanceType returns true if the passed type is safe for var declaration.
 								// Unsafe types: generic types and non-method interfaces.
 								func isSafeForInstanceType(typ types.Type) bool {
 									switch t := typ.(type) {
 									case *types.Named:
 										if t.TypeParams().Len() > 0 {
 											return false
-												add support for obfuscation of dot-imported string constants


											
										
										
											1 year ago
+										}
-												ensure that all imports are used after obfuscation

garble's -literals flag and its patching of the runtime may leave unused imports.
We used to try to detect those and remove the imports,
but that was still buggy with edge cases like dot imports or renamed imports.

Moreover, it was potentially incorrect.
Completely removing an import from a package means we don't run its init funcs,
which could have side effects changing the behavior of a program.
As an example, database/sql drivers are registered at init time.

Instead, for each import in an obfuscated Go file,
add an unnamed declaration which references the imported package.
This may not be necessary for all imported packages,
as only a minority become unused due to garble,
but it's also relatively harmless to do so.

Fixes #658.
											
										
										
											1 year ago
+										return isSafeForInstanceType(t.Underlying())
 									case *types.Signature:
 										return t.TypeParams().Len() == 0
 									case *types.Interface:
 										return t.IsMethodSet()
 									}
 									return true
 								}
-												Detect unnecessary imports instead of hardcoding

											
										
										
											2 years ago
-												reduce verbosity in the last change

Declare the ast.GenDecl node once,
which allows us to deduplicate and inline code.
resolveImportSpec doesn't need to be separate either.

We also don't need explicit panics for unexpected nils;
we can rely on nil checks inserted by the compiler.

Finally, move the bulk of the code outside of the scope.Names loop,
which unindents most of the logic.

While here, add a few more inline docs.

											
										
										
											1 year ago
+								func (tf *transformer) useAllImports(file *ast.File) {
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									if tf.usedAllImportsFiles == nil {
 										tf.usedAllImportsFiles = make(map[*ast.File]bool)
 									} else if ok := tf.usedAllImportsFiles[file]; ok {
 										return
 									}
 									tf.usedAllImportsFiles[file] = true
-												Detect unnecessary imports instead of hardcoding

											
										
										
											2 years ago
+									for _, imp := range file.Imports {
-												add support for obfuscation of dot-imported string constants


											
										
										
											1 year ago
+										if imp.Name != nil && imp.Name.Name == "_" {
-												Detect unnecessary imports instead of hardcoding

											
										
										
											2 years ago
+											continue
 										}
-												use types.Info.PkgNameOf

It accomplishes the same Implicits/Defs logic we were doing here.

											
										
										
											4 months ago
+										pkgName := tf.info.PkgNameOf(imp)
 										pkgScope := pkgName.Imported().Scope()
-												reduce verbosity in the last change

Declare the ast.GenDecl node once,
which allows us to deduplicate and inline code.
resolveImportSpec doesn't need to be separate either.

We also don't need explicit panics for unexpected nils;
we can rely on nil checks inserted by the compiler.

Finally, move the bulk of the code outside of the scope.Names loop,
which unindents most of the logic.

While here, add a few more inline docs.

											
										
										
											1 year ago
+										var nameObj types.Object
 										for _, name := range pkgScope.Names() {
 											if obj := pkgScope.Lookup(name); obj.Exported() && isSafeForInstanceType(obj.Type()) {
 												nameObj = obj
 												break
-												ensure that all imports are used after obfuscation

garble's -literals flag and its patching of the runtime may leave unused imports.
We used to try to detect those and remove the imports,
but that was still buggy with edge cases like dot imports or renamed imports.

Moreover, it was potentially incorrect.
Completely removing an import from a package means we don't run its init funcs,
which could have side effects changing the behavior of a program.
As an example, database/sql drivers are registered at init time.

Instead, for each import in an obfuscated Go file,
add an unnamed declaration which references the imported package.
This may not be necessary for all imported packages,
as only a minority become unused due to garble,
but it's also relatively harmless to do so.

Fixes #658.
											
										
										
											1 year ago
+											}
-												Detect unnecessary imports instead of hardcoding

											
										
										
											2 years ago
+										}
-												reduce verbosity in the last change

Declare the ast.GenDecl node once,
which allows us to deduplicate and inline code.
resolveImportSpec doesn't need to be separate either.

We also don't need explicit panics for unexpected nils;
we can rely on nil checks inserted by the compiler.

Finally, move the bulk of the code outside of the scope.Names loop,
which unindents most of the logic.

While here, add a few more inline docs.

											
										
										
											1 year ago
+										if nameObj == nil {
-												ensure that all imports are used after obfuscation

garble's -literals flag and its patching of the runtime may leave unused imports.
We used to try to detect those and remove the imports,
but that was still buggy with edge cases like dot imports or renamed imports.

Moreover, it was potentially incorrect.
Completely removing an import from a package means we don't run its init funcs,
which could have side effects changing the behavior of a program.
As an example, database/sql drivers are registered at init time.

Instead, for each import in an obfuscated Go file,
add an unnamed declaration which references the imported package.
This may not be necessary for all imported packages,
as only a minority become unused due to garble,
but it's also relatively harmless to do so.

Fixes #658.
											
										
										
											1 year ago
+											// A very unlikely situation where there is no suitable declaration for a reference variable
 											// and almost certainly means that there is another import reference in code.
-												reduce verbosity in the last change

Declare the ast.GenDecl node once,
which allows us to deduplicate and inline code.
resolveImportSpec doesn't need to be separate either.

We also don't need explicit panics for unexpected nils;
we can rely on nil checks inserted by the compiler.

Finally, move the bulk of the code outside of the scope.Names loop,
which unindents most of the logic.

While here, add a few more inline docs.

											
										
										
											1 year ago
+											continue
-												ensure that all imports are used after obfuscation

garble's -literals flag and its patching of the runtime may leave unused imports.
We used to try to detect those and remove the imports,
but that was still buggy with edge cases like dot imports or renamed imports.

Moreover, it was potentially incorrect.
Completely removing an import from a package means we don't run its init funcs,
which could have side effects changing the behavior of a program.
As an example, database/sql drivers are registered at init time.

Instead, for each import in an obfuscated Go file,
add an unnamed declaration which references the imported package.
This may not be necessary for all imported packages,
as only a minority become unused due to garble,
but it's also relatively harmless to do so.

Fixes #658.
											
										
										
											1 year ago
+										}
-												reduce verbosity in the last change

Declare the ast.GenDecl node once,
which allows us to deduplicate and inline code.
resolveImportSpec doesn't need to be separate either.

We also don't need explicit panics for unexpected nils;
we can rely on nil checks inserted by the compiler.

Finally, move the bulk of the code outside of the scope.Names loop,
which unindents most of the logic.

While here, add a few more inline docs.

											
										
										
											1 year ago
+										spec := &ast.ValueSpec{Names: []*ast.Ident{ast.NewIdent("_")}}
 										decl := &ast.GenDecl{Specs: []ast.Spec{spec}}
 										nameIdent := ast.NewIdent(nameObj.Name())
 										var nameExpr ast.Expr
 										switch {
 										case imp.Name == nil: // import "pkg/path"
 											nameExpr = &ast.SelectorExpr{
-												use types.Info.PkgNameOf

It accomplishes the same Implicits/Defs logic we were doing here.

											
										
										
											4 months ago
+												X:   ast.NewIdent(pkgName.Name()),
-												reduce verbosity in the last change

Declare the ast.GenDecl node once,
which allows us to deduplicate and inline code.
resolveImportSpec doesn't need to be separate either.

We also don't need explicit panics for unexpected nils;
we can rely on nil checks inserted by the compiler.

Finally, move the bulk of the code outside of the scope.Names loop,
which unindents most of the logic.

While here, add a few more inline docs.

											
										
										
											1 year ago
+												Sel: nameIdent,
 											}
 										case imp.Name.Name != ".": // import path2 "pkg/path"
 											nameExpr = &ast.SelectorExpr{
 												X:   ast.NewIdent(imp.Name.Name),
 												Sel: nameIdent,
 											}
 										default: // import . "pkg/path"
 											nameExpr = nameIdent
 										}
 										switch nameObj.(type) {
 										case *types.Const:
 											// const _ = <value>
 											decl.Tok = token.CONST
 											spec.Values = []ast.Expr{nameExpr}
 										case *types.Var, *types.Func:
 											// var _ = <value>
 											decl.Tok = token.VAR
 											spec.Values = []ast.Expr{nameExpr}
 										case *types.TypeName:
 											// var _ <type>
 											decl.Tok = token.VAR
 											spec.Type = nameExpr
 										default:
 											continue // skip *types.Builtin and others
 										}
 										// Ensure that types.Info.Uses is up to date.
 										tf.info.Uses[nameIdent] = nameObj
 										file.Decls = append(file.Decls, decl)
-												Detect unnecessary imports instead of hardcoding

											
										
										
											2 years ago
+									}
 								}
-												skip over comments when obfuscating assembly

github.com/bytedance/sonic has many comments in its assembly files,
and one in particular caused us trouble:

	internal/rt/asm_amd64.s:2:// Code generated by asm2asm, DO NOT EDIT·

Since we looked for "middle dot" characters anywhere,
we would try to load the package "EDIT", which clearly does not exist.

To fix that, start skipping over comments. Thankfully,
Go's assembly syntax only supports "//" line comments,
so it's enough to read one line at a time and skip them quickly.
We also longer need a regular expression to find #include lines.

While here, two other minor bits of cleanup.
First, rename transformGo to transformGoFile, for clarity.
Second, use ".s" extensions for obfuscated assembly filenames,
just like we do with the comiler and ".go" files.

Updates #621.

											
										
										
											1 year ago
+								// transformGoFile obfuscates the provided Go syntax file.
 								func (tf *transformer) transformGoFile(file *ast.File) *ast.File {
-												only obfuscate literals in packages to obfuscate

Add a regression test in gogarble.txt,
as that test is already set up with packages to not obfuscate.

This bug manifested in the form of a build failure for GOOS=plan9
with -literals turned on:

	[...]/os/file_plan9.go:151:12: invalid operation: cannot call non-function append (variable of type bool)

In this case, the "os" package is not to be obfuscated,
but we would still obfuscate its literals as per the bug.

But, since the package's identifiers were not obfuscated,
names like "append" were not replaced as per ea2e0bdf71,
meaning that the shadowing would still affect us.

Fixes #417.

											
										
										
											3 years ago
+									// Only obfuscate the literals here if the flag is on
 									// and if the package in question is to be obfuscated.
-												support GOGARBLE=* with -literals again

We recently made an important change when obfuscating the runtime,
so that if it's missing any linkname packages in ListedPackages,
it does an extra "go list" call to obtain their information.

This works very well, but we missed an edge case.
In main.go, we disable flagLiterals for the runtime package,
but not for other packages like sync/atomic.

And, since the runtime's extra "go list" has to compute GarbleActionIDs,
it uses the list of garble flags via appendFlags.
Unfortunately, it thinks "-literals" isn't set, when it is,
and the other packages see it as being set.

This discrepancy results in link time errors,
as each end of the linkname obfuscates with a different hash:

	> garble -literals build
	[stderr]
	# test/main
	jccGkbFG.(*yijmzGHo).String: relocation target jccGkbFG.e_77sflf not defined
	jQg9GEkg.(*NLxfRPAP).pB5p2ZP0: relocation target jQg9GEkg.ce66Fmzl not defined
	jQg9GEkg.(*NLxfRPAP).pB5p2ZP0: relocation target jQg9GEkg.e5kPa1qY not defined
	jQg9GEkg.(*NLxfRPAP).pB5p2ZP0: relocation target jQg9GEkg.aQ_3sL3Q not defined
	jQg9GEkg.(*NLxfRPAP).pB5p2ZP0: relocation target jQg9GEkg.zls3wmws not defined
	jQg9GEkg.(*NLxfRPAP).pB5p2ZP0: relocation target jQg9GEkg.g69WgKIS not defined

To fix the problem, treat flagLiterals as read-only after flag.Parse,
just like we already do with the other flags except flagDebugDir.
The code that turned flagLiterals to false is no longer needed,
as literals.Obfuscate is only called when ToObfuscate is true,
and ToObfuscate is false for runtimeAndDeps already.

											
										
										
											2 years ago
+									//
 									// We can't obfuscate literals in the runtime and its dependencies,
 									// because obfuscated literals sometimes escape to heap,
 									// and that's not allowed in the runtime itself.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+									if flagLiterals && tf.curPkg.ToObfuscate {
 										file = literals.Obfuscate(tf.obfRand, file, tf.info, tf.linkerVariableStrings)
-												Remove unused imports via go/types.

Fixes #481

											
										
										
											2 years ago
 										// some imported constants might not be needed anymore, remove unnecessary imports
-												reduce verbosity in the last change

Declare the ast.GenDecl node once,
which allows us to deduplicate and inline code.
resolveImportSpec doesn't need to be separate either.

We also don't need explicit panics for unexpected nils;
we can rely on nil checks inserted by the compiler.

Finally, move the bulk of the code outside of the scope.Names loop,
which unindents most of the logic.

While here, add a few more inline docs.

											
										
										
											1 year ago
+										tf.useAllImports(file)
-												obfuscate literals as part of transformGo (#299)

This is easier to understand, since now the modification of the
*ast.File is all within a single chunk of code. We can also simplify
literals.Obfuscate to work on a single file, as transformGo runs in a
loop.

We also remove the "use receiver" TODOs, since the code is now in a
different package and it can't declare methods on a type here.
											
										
										
											3 years ago
+									}
-												support type switches with symbolic vars

											
										
										
											5 years ago
+									pre := func(cursor *astutil.Cursor) bool {
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+										node, ok := cursor.Node().(*ast.Ident)
 										if !ok {
 											return true
 										}
-												implement TODO to use a name variable

Slightly simplifies the main chunk of code.
While here, improve the wording for when Go isn't installed correctly.

											
										
										
											2 years ago
+										name := node.Name
 										if name == "_" {
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+											return true // unnamed remains unnamed
 										}
-												introduce a receiver type to transform a Go package

Means that we no longer have to pass a dozen parameters around, mainly
to transformGo. We can also start documenting what each of the fields
actually does, and group them better.

While at it, pkgPath and pkgScope can both be replaced by a
*types.Package, since they're both accessible via trivially cheap
methods.

											
										
										
											4 years ago
+										obj := tf.info.ObjectOf(node)
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+										if obj == nil {
-												obfuscate all variable names, even local ones (#420)

In the added test case, "garble -literals build" would fail:

	--- FAIL: TestScripts/literals (8.29s)
		testscript.go:397:
			> env GOPRIVATE=test/main
			> garble -literals build
			[stderr]
			# test/main
			Usz1FmFm.go:1: cannot call non-function string (type int), declared at Usz1FmFm.go:1
			Usz1FmFm.go:1: string is not a type
			Usz1FmFm.go:1: cannot call non-function append (type int), declared at Usz1FmFm.go:1

That is, for input code such as:

	var append int
	println("foo")
	_ = append

We'd end up with obfuscated code like:

	var append int
	println(func() string {
		// obfuscation...
		x = append(x, ...)
		// obfuscation...
		return string(x)
	})
	_ = append

Which would then break, as the code is shadowing the "append" builtin.
To work around this, always obfuscate variable names, so we end up with:

	var mwu1xuNz int
	println(func() string {
		// obfuscation...
		x = append(x, ...)
		// obfuscation...
		return string(x)
	})
	_ = mwu1xuNz

This change shouldn't make the quality of our obfuscation stronger,
as local variable names do not currently end up in Go binaries.
However, this does make garble more consistent in treating identifiers,
and it completely avoids any issues related to shadowing builtins.

Moreover, this also paves the way for publishing obfuscated source code,
such as #369.

Fixes #417.
											
										
										
											3 years ago
+											_, isImplicit := tf.info.Defs[node]
 											_, parentIsFile := cursor.Parent().(*ast.File)
-												minor code tidying up

We don't need to nest the RemoveAll and MkdirAll calls for debugdir.

Fill the string for -toolexec= when we actually append it to goArgs.

Consistently use the objectString type alias.

Remove unnecessary parameter names in transformFuncs.

Unindent the code for switch variables by reversing the conditional.

											
										
										
											2 years ago
+											if !isImplicit || parentIsFile {
 												// We only care about nil objects in the switch scenario below.
-												obfuscate all variable names, even local ones (#420)

In the added test case, "garble -literals build" would fail:

	--- FAIL: TestScripts/literals (8.29s)
		testscript.go:397:
			> env GOPRIVATE=test/main
			> garble -literals build
			[stderr]
			# test/main
			Usz1FmFm.go:1: cannot call non-function string (type int), declared at Usz1FmFm.go:1
			Usz1FmFm.go:1: string is not a type
			Usz1FmFm.go:1: cannot call non-function append (type int), declared at Usz1FmFm.go:1

That is, for input code such as:

	var append int
	println("foo")
	_ = append

We'd end up with obfuscated code like:

	var append int
	println(func() string {
		// obfuscation...
		x = append(x, ...)
		// obfuscation...
		return string(x)
	})
	_ = append

Which would then break, as the code is shadowing the "append" builtin.
To work around this, always obfuscate variable names, so we end up with:

	var mwu1xuNz int
	println(func() string {
		// obfuscation...
		x = append(x, ...)
		// obfuscation...
		return string(x)
	})
	_ = mwu1xuNz

This change shouldn't make the quality of our obfuscation stronger,
as local variable names do not currently end up in Go binaries.
However, this does make garble more consistent in treating identifiers,
and it completely avoids any issues related to shadowing builtins.

Moreover, this also paves the way for publishing obfuscated source code,
such as #369.

Fixes #417.
											
										
										
											3 years ago
+												return true
 											}
-												minor code tidying up

We don't need to nest the RemoveAll and MkdirAll calls for debugdir.

Fill the string for -toolexec= when we actually append it to goArgs.

Consistently use the objectString type alias.

Remove unnecessary parameter names in transformFuncs.

Unindent the code for switch variables by reversing the conditional.

											
										
										
											2 years ago
+											// In a type switch like "switch foo := bar.(type) {",
 											// "foo" is being declared as a symbolic variable,
 											// as it is only actually declared in each "case SomeType:".
 											//
 											// As such, the symbolic "foo" in the syntax tree has no object,
 											// but it is still recorded under Defs with a nil value.
 											// We still want to obfuscate that syntax tree identifier,
 											// so if we detect the case, create a dummy types.Var for it.
 											//
 											// Note that "package mypkg" also denotes a nil object in Defs,
 											// and we don't want to treat that "mypkg" as a variable,
 											// so avoid that case by checking the type of cursor.Parent.
 											obj = types.NewVar(node.Pos(), tf.pkg, name, nil)
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+										}
 										pkg := obj.Pkg()
-												first version of plugins working

Add a caveat about -trimpath too.

Fixes #18.

											
										
										
											4 years ago
+										if vr, ok := obj.(*types.Var); ok && vr.Embedded() {
-												obfuscate alias names like any other objects

Before this change, we would try to never obfuscate alias names. That
was far from ideal, as they can end up in field names via anonymous
fields.

Even then, we would sometimes still fail to build, because we would
inconsistently obfuscate alias names. For example, in the added test
case:

	--- FAIL: TestScripts/syntax (0.23s)
	    testscript.go:397:
	        > env GOPRIVATE='test/main,private.source'
	        > garble build
	        [stderr]
	        # test/main/sub
	        Lv_a8gRD.go:15: undefined: KCvSpxmQ

To fix this problem, we set obj to be the TypeName corresponding to the
alias when it is used as an embedded field. We can then make the right
choice when obfuscating the name.

Right now, all aliases will be obfuscated. A TODO exists about not
obfuscating alias names when they're used as embedded fields in a struct
type in the same package, and that package is used for reflection -
since then, the alias name ends up as the field name.

With these changes, the protobuf module now builds.

											
										
										
											3 years ago
+											// The docs for ObjectOf say:
 											//
 											//     If id is an embedded struct field, ObjectOf returns the
 											//     field (*Var) it defines, not the type (*TypeName) it uses.
 											//
 											// If this embedded field is a type alias, we want to
-												support aliases as embedded fields in dependencies

Our recent work in fieldToAlias worked well when the embedded field
declaration (using an alias) was in the same package as the use of that
field. We would have the *ast.Ident for the field declaration, so
types.Info.Uses would give us the TypeName for the alias.

Unfortunately, if the declaration was in a dependency package, we did
not have that same *ast.Ident, as we weren't parsing the source code for
dependencies for type-checking. This resulted in us incorrectly
obfuscating the use of such an embedded field:

	> garble build
	[stderr]
	# test/main
	JtzmzxWf.go:4: unknown field 'ExternalForeignAlias' in struct literal of type _BdSNiEL.Vcs_smer

To fix this, look through the direct imports of the package defining the
field to find an alias under the exact same name. Not a foolproof
solution, as there's a TODO, but it should work for most cases.

Fixes the obfuscation of google.golang.org/grpc/internal/status, too.

Updates #349.

											
										
										
											3 years ago
+											// handle the alias's TypeName instead of treating it as
 											// the type the alias points to.
-												obfuscate alias names like any other objects

Before this change, we would try to never obfuscate alias names. That
was far from ideal, as they can end up in field names via anonymous
fields.

Even then, we would sometimes still fail to build, because we would
inconsistently obfuscate alias names. For example, in the added test
case:

	--- FAIL: TestScripts/syntax (0.23s)
	    testscript.go:397:
	        > env GOPRIVATE='test/main,private.source'
	        > garble build
	        [stderr]
	        # test/main/sub
	        Lv_a8gRD.go:15: undefined: KCvSpxmQ

To fix this problem, we set obj to be the TypeName corresponding to the
alias when it is used as an embedded field. We can then make the right
choice when obfuscating the name.

Right now, all aliases will be obfuscated. A TODO exists about not
obfuscating alias names when they're used as embedded fields in a struct
type in the same package, and that package is used for reflection -
since then, the alias name ends up as the field name.

With these changes, the protobuf module now builds.

											
										
										
											3 years ago
+											//
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+											// Alternatively, if we don't have an alias, we still want to
-												obfuscate alias names like any other objects

Before this change, we would try to never obfuscate alias names. That
was far from ideal, as they can end up in field names via anonymous
fields.

Even then, we would sometimes still fail to build, because we would
inconsistently obfuscate alias names. For example, in the added test
case:

	--- FAIL: TestScripts/syntax (0.23s)
	    testscript.go:397:
	        > env GOPRIVATE='test/main,private.source'
	        > garble build
	        [stderr]
	        # test/main/sub
	        Lv_a8gRD.go:15: undefined: KCvSpxmQ

To fix this problem, we set obj to be the TypeName corresponding to the
alias when it is used as an embedded field. We can then make the right
choice when obfuscating the name.

Right now, all aliases will be obfuscated. A TODO exists about not
obfuscating alias names when they're used as embedded fields in a struct
type in the same package, and that package is used for reflection -
since then, the alias name ends up as the field name.

With these changes, the protobuf module now builds.

											
										
										
											3 years ago
+											// use the embedded type, not the field.
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+											vrStr := recordedObjectString(vr)
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+											aliasTypeName, ok := tf.curPkgCache.EmbeddedAliasFields[vrStr]
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+											if ok {
-												avoid panic when embedding a builtin alias

TypeName.Pkg is documented as:

    Pkg returns the package to which the object belongs.
    The result is nil for labels and objects in the Universe scope.

When a struct type embeds a builtin alias type, such as byte,
this would lead to a panic since we assumed we could use the Pkg method.

Fixes #798.

											
										
										
											7 months ago
+												aliasScope := tf.pkg.Scope()
 												if path := aliasTypeName.PkgPath; path == "" {
 													aliasScope = types.Universe
 												} else if path != tf.pkg.Path() {
-												remove TODO for tf.pkg.Imports

I tried to fix the TODO, but ran into the problem described by the added
documentation - that some packages in the import graph are incomplete,
as go/types was clever and didn't fully load them.

While here, also make the panics a bit more descriptive,
which helped me debug what was going wrong after the attempted refactor.

											
										
										
											2 years ago
+													// If the package is a dependency, import it.
 													// We can't grab the package via tf.pkg.Imports,
 													// because some of the packages under there are incomplete.
 													// ImportFrom will cache complete imports, anyway.
-												avoid panic when embedding a builtin alias

TypeName.Pkg is documented as:

    Pkg returns the package to which the object belongs.
    The result is nil for labels and objects in the Universe scope.

When a struct type embeds a builtin alias type, such as byte,
this would lead to a panic since we assumed we could use the Pkg method.

Fixes #798.

											
										
										
											7 months ago
+													pkg2, err := tf.origImporter.ImportFrom(path, parentWorkDir, 0)
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+													if err != nil {
 														panic(err)
 													}
-												avoid panic when embedding a builtin alias

TypeName.Pkg is documented as:

    Pkg returns the package to which the object belongs.
    The result is nil for labels and objects in the Universe scope.

When a struct type embeds a builtin alias type, such as byte,
this would lead to a panic since we assumed we could use the Pkg method.

Fixes #798.

											
										
										
											7 months ago
+													aliasScope = pkg2.Scope()
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+												}
-												avoid panic when embedding a builtin alias

TypeName.Pkg is documented as:

    Pkg returns the package to which the object belongs.
    The result is nil for labels and objects in the Universe scope.

When a struct type embeds a builtin alias type, such as byte,
this would lead to a panic since we assumed we could use the Pkg method.

Fixes #798.

											
										
										
											7 months ago
+												tname, ok := aliasScope.Lookup(aliasTypeName.Name).(*types.TypeName)
-												simplify types.TypeName panic logic

The logic

	if X || Y {
		if X {
			<X panic>
		}
		<Y panic>
	}

is equivalent to, and easier to read as:

	if X {
		<X panic>
	}
	if Y {
		<Y panic>
	}

It probably evolved unintentionally to be this way.

											
										
										
											1 year ago
+												if !ok {
-												declare a type for cachedOutput

To properly make our cache robust, we'll need to be able to compute
cache entries for dependencies as needed if they are missing.
So we'll need to create more of these struct values in the code.

Rename cachedOutput to curPkgCache, to clarify that it relates
to the current package.

While here, remove the "known" prefix on all pkgCache fields.
All of the names still make perfect sense without it.

											
										
										
											1 year ago
+													panic(fmt.Sprintf("EmbeddedAliasFields pointed %q to a missing type %q", vrStr, aliasTypeName))
-												simplify types.TypeName panic logic

The logic

	if X || Y {
		if X {
			<X panic>
		}
		<Y panic>
	}

is equivalent to, and easier to read as:

	if X {
		<X panic>
	}
	if Y {
		<Y panic>
	}

It probably evolved unintentionally to be this way.

											
										
										
											1 year ago
+												}
 												if !tname.IsAlias() {
-												declare a type for cachedOutput

To properly make our cache robust, we'll need to be able to compute
cache entries for dependencies as needed if they are missing.
So we'll need to create more of these struct values in the code.

Rename cachedOutput to curPkgCache, to clarify that it relates
to the current package.

While here, remove the "known" prefix on all pkgCache fields.
All of the names still make perfect sense without it.

											
										
										
											1 year ago
+													panic(fmt.Sprintf("EmbeddedAliasFields pointed %q to a non-alias type %q", vrStr, aliasTypeName))
-												handle aliases to foreign named types properly

When such an alias name was used to define an embedded field, we handled
that case gracefully via the code using:

	tf.info.Uses[node].(*types.TypeName)

Unfortunately, when the same field name was used elsewhere, such as a
composite literal, tf.Info.Uses gave us a *types.Var, not a
*types.TypeName, meaning we could no longer tell if this was an alias,
or what it pointed to.

Thus, we failed to obfuscate the name properly in the added test case:

	> garble build
	[stderr]
	# test/main/sub
	xxWZf66u.go:36: unknown field 'foreignAlias' in struct literal of type smhWelwn

It doesn't seem like any of the go/types APIs allows us to obtain the
*types.TypeName directly in this scenario. Thus, use a trick that we
used before: after typechecking, but before obfuscating, record all
embedded struct field *types.Var which are aliases via a map, where the
value holds the *types.TypeName for the alias.

Updates #349.

											
										
										
											3 years ago
+												}
-												obfuscate alias names like any other objects

Before this change, we would try to never obfuscate alias names. That
was far from ideal, as they can end up in field names via anonymous
fields.

Even then, we would sometimes still fail to build, because we would
inconsistently obfuscate alias names. For example, in the added test
case:

	--- FAIL: TestScripts/syntax (0.23s)
	    testscript.go:397:
	        > env GOPRIVATE='test/main,private.source'
	        > garble build
	        [stderr]
	        # test/main/sub
	        Lv_a8gRD.go:15: undefined: KCvSpxmQ

To fix this problem, we set obj to be the TypeName corresponding to the
alias when it is used as an embedded field. We can then make the right
choice when obfuscating the name.

Right now, all aliases will be obfuscated. A TODO exists about not
obfuscating alias names when they're used as embedded fields in a struct
type in the same package, and that package is used for reflection -
since then, the alias name ends up as the field name.

With these changes, the protobuf module now builds.

											
										
										
											3 years ago
+												obj = tname
 											} else {
 												named := namedType(obj.Type())
 												if named == nil {
 													return true // unnamed type (probably a basic type, e.g. int)
 												}
-												properly record when type aliases are embedded as fields

There are two scenarios when it comes to embedding fields.
The first is easy, and we always handled it well:

	type Named struct { Foo int }

	type T struct { Named }

In this scenario, T ends up with an embedded field named "Named",
and a promoted field named "Foo".

Then there's the form with a type alias:

	type Named struct { Foo int }

	type Alias = Named

	type T struct { Alias }

This case is different: T ends up with an embedded field named "Alias",
and a promoted field named "Foo".
Note how the field gets its name from the referenced type,
even if said type is just an alias to another type.

This poses two problems.
First, we must obfuscate the field T.Alias as the name "Alias",
and not as the name "Named" that the alias points to.
Second, we must be careful of cases where Named and Alias are declared
in different packages, as they will obfuscate the same name differently.

Both of those problems compounded in the reported issue.
The actual reason is that quic-go has a type alias in the form of:

	type ConnectionState = qtls.ConnectionState

In other words, the entire problem boils down to a type alias which
points to a named type in a different package, where both types share
the same name. For example:

	package parent

	import "parent/p1"

	type T struct { p1.SameName }

	[...]

	package p1

	import "parent/p2"

	type SameName = p2.SameName

	[...]

	package p2

	type SameName struct { Foo int }

This broke garble because we had a heuristic to detect when an embedded
field was a type alias:

	// Instead, detect such a "foreign alias embed".
	// If we embed a final named type,
	// but the field name does not match its name,
	// then it must have been done via an alias.
	// We dig out the alias's TypeName via locateForeignAlias.
	if named.Obj().Name() != node.Name {

As the reader can deduce, this heuristic would incorrectly assume that
the snippet above does not embed a type alias, when in fact it does.
When obfuscating the field T.SameName, which uses a type alias,
we would correctly obfuscate the name "SameName",
but we would incorrectly obfuscate it with the package p2, not p1.
This would then result in build errors.

To fix this problem for good, we need to get rid of the heuristic.
Instead, we now mimic what was done for KnownCannotObfuscate,
but for embedded fields which use type aliases.
KnownEmbeddedAliasFields is now filled for each package
and stored in the cache as part of cachedOutput.
We can then detect the "embedded alias" case reliably,
even when the field is declared in an imported package.

On the plus side, we get to remove locateForeignAlias.
We also add a couple of TODOs to record further improvements.
Finally, add a test.

Fixes #466.

											
										
										
											2 years ago
+												obj = named.Obj()
-												handle embedded struct fields with universe scope

Whilst it may not be particularly common, it is legal to embed fields
where the type has universe scope (e.g. int, error, etc). This can
cause a panic in 2 difference places:

- When embedding `error`, a named type is resolved but the package is
nil. The call to `pkg.Name()` results in a panic
- When embedding a basic type such as `int`, no named type is resolved
at all. The call to `namedType(obj.Type()).Obj()` results in a panic

I'm assuming it is OK to return early when a named type cannot be
resolved.. we could let it continue but I think `pkg` should be set to
nil to be correct, so it'd end up returning straight away anyway.
											
										
										
											4 years ago
+											}
-												first version of plugins working

Add a caveat about -trimpath too.

Fixes #18.

											
										
										
											4 years ago
+											pkg = obj.Pkg()
 										}
-												handle embedded struct fields with universe scope

Whilst it may not be particularly common, it is legal to embed fields
where the type has universe scope (e.g. int, error, etc). This can
cause a panic in 2 difference places:

- When embedding `error`, a named type is resolved but the package is
nil. The call to `pkg.Name()` results in a panic
- When embedding a basic type such as `int`, no named type is resolved
at all. The call to `namedType(obj.Type()).Obj()` results in a panic

I'm assuming it is OK to return early when a named type cannot be
resolved.. we could let it continue but I think `pkg` should be set to
nil to be correct, so it'd end up returning straight away anyway.
											
										
										
											4 years ago
+										if pkg == nil {
 											return true // universe scope
 										}
-												first version of plugins working

Add a caveat about -trimpath too.

Fixes #18.

											
										
										
											4 years ago
-												avoid breaking github.com/davecgh/go-spew

It assumes that reflect.Value has a field named "flag",
which wasn't the case with obfuscated builds as we obfuscated it.

We already treated the reflect package as special,
for instance when not obfuscating Method or MethodByName.
In a similar fashion, mark reflect's rtype and Value types to not be
obfuscated alongside their field names. Note that rtype is the
implementation behind the reflect.Type interface.

This fix is fairly manual and repetitive.
transformCompile, transformLinkname, and transformAsm should all
use the same mechanism to tell if names should be obfuscated.
However, they do not do that right now, and that refactor feels too
risky for a bugfix release. We add more TODOs instead.

We're not adding go-spew to scripts/check-third-party.sh since the
project is largely abandoned. It's not even a Go module yet.
The only broken bit from it is what we've added to our tests.

Fixes #676.

											
										
										
											1 year ago
+										// TODO: We match by object name here, which is actually imprecise.
 										// For example, in package embed we match the type FS, but we would also
 										// match any field or method named FS.
-												isolate reflect.go from updating globals directly

That is, stop reusing "transformer" as the receiver on methods,
and stop writing the results to the global curPkgCache struct.

Soon we will need to support computing pkgCache for any dependency,
not just the current package, to make the caching properly robust.
This allows us to fill reflectInspector with different values.

The explicit isolation also helps prevent bugs.
For instance, we were calling recursivelyRecordAsNotObfuscated from
transformCompile, which happens after we have loaded or saved pkgCache.
Meaning, the current package sees a larger pkgCache than its dependents.
In this particular case it wasn't causing any bugs,
since the two reflect types in question only had unexported fields,
but it's still good to treat pkgCache as read-only in transformCompile.

											
										
										
											1 year ago
+										// Can we instead use an object map like ReflectObjects?
-												avoid breaking intrinsics when obfuscating names

We obfuscate import paths as well as their declared names.
The compiler treats some packages and APIs in special ways,
and the way it detects those is by looking at import paths and names.

In the past, we have avoided obfuscating some names like embed.FS or
reflect.Value.MethodByName for this reason. Otherwise,
go:embed or the linker's deadcode elimination might be broken.

This matching by path and name also happens with compiler intrinsics.
Intrinsics allow the compiler to rewrite some standard library calls
with small and efficient assembly, depending on the target GOARCH.
For example, math/bits.TrailingZeros32 gets replaced with ssa.OpCtz32,
which on amd64 may result in using the TZCNTL instruction.

We never noticed that we were breaking many of these intrinsics.
The intrinsics for funcs declared in the runtime and its dependencies
still worked properly, as we do not obfuscate those packages yet.
However, for other packages like math/bits and sync/atomic,
the intrinsics were being entirely disabled due to obfuscated names.

Skipping intrinsics is particularly bad for performance,
and it also leads to slightly larger binaries:

			 │      old      │                 new                 │
			 │     bin-B     │     bin-B      vs base              │
	Build-16   5.450Mi ± ∞ ¹   5.333Mi ± ∞ ¹  -2.15% (p=0.029 n=4)

Finally, the main reason we noticed that intrinsics were broken
is that apparently GOARCH=mips fails to link without them,
as some symbols end up being not defined at all.
This patch fixes builds for the MIPS family of architectures.

Rather than building and linking all of std for every GOARCH,
test that intrinsics work by asking the compiler to print which
intrinsics are being applied, and checking that math/bits gets them.

This fix is relatively unfortunate, as it means we stop obfuscating
about 120 function names and a handful of package paths.
However, fixing builds and intrinsics is much more important.
We can figure out better ways to deal with intrinsics in the future.

Fixes #646.

											
										
										
											1 year ago
+										path := pkg.Path()
 										switch path {
-												ensure the alignment of sync/atomic types works

Added in Go 1.19, types like sync/atomic.Uint64 are handy,
because they ensure proper alignment even on 32-bit GOOSes.
However, this was done via a magic `type align64 struct{}`,
which the compiler spotted by name.

To keep that magic working, do not obfuscate the name.
Neither package path was being obfuscated,
as both packages contain compiler intrinsics already.

Fixes #686.

											
										
										
											1 year ago
+										case "sync/atomic", "runtime/internal/atomic":
 											if name == "align64" {
 												return true
 											}
-												avoid reflect method call panics with GOGARBLE=*

We were obfuscating reflect's package path and its declared names,
but the toolchain wants to detect the presence of method reflection
to turn down the aggressiveness of dead code elimination.

Given that the obfuscation broke the detection,
we could easily end up in crashes when making reflect calls:

	fatal error: unreachable method called. linker bug?

	goroutine 1 [running]:
	runtime.throw({0x50c9b3?, 0x2?})
		runtime/panic.go:1047 +0x5d fp=0xc000063660 sp=0xc000063630 pc=0x43245d
	runtime.unreachableMethod()
		runtime/iface.go:532 +0x25 fp=0xc000063680 sp=0xc000063660 pc=0x40a845
	runtime.call16(0xc00010a360, 0xc00000e0a8, 0x0, 0x0, 0x0, 0x8, 0xc000063bb0)
		runtime/wcS9OpRFL:728 +0x49 fp=0xc0000636a0 sp=0xc000063680 pc=0x45eae9
	runtime.reflectcall(0xc00001c120?, 0x1?, 0x1?, 0x18110?, 0xc0?, 0x1?, 0x1?)
		<autogenerated>:1 +0x3c fp=0xc0000636e0 sp=0xc0000636a0 pc=0x462e9c

Avoid obfuscating the three names which cause problems: "reflect",
"Method", and "MethodByName".

While here, we also teach obfuscatedImportPath to skip "runtime",
as I also saw that the toolchain detects it for many reasons.
That wasn't a problem yet, as we do not obfuscate the runtime,
but it was likely going to become a problem in the future.

											
										
										
											2 years ago
+										case "embed":
 											// FS is detected by the compiler for //go:embed.
-												various minor TODO cleanups

computeLinkerVariableStrinsg had an unusedargument.

Only skip obfuscating the name "FS" in the "embed" package.

The reflect methods no longer use the transformer receiver type,
so that TODO now feels unnecessary. Many methods need to be aware
of what the current types.Package is, and that seems reasonable.

We no longer use writeFileExclusive for our own cache on disk,
so the TODO about using locking or atomic writes is no longer relevant.

											
										
										
											1 year ago
+											if name == "FS" {
 												return true
 											}
-												avoid reflect method call panics with GOGARBLE=*

We were obfuscating reflect's package path and its declared names,
but the toolchain wants to detect the presence of method reflection
to turn down the aggressiveness of dead code elimination.

Given that the obfuscation broke the detection,
we could easily end up in crashes when making reflect calls:

	fatal error: unreachable method called. linker bug?

	goroutine 1 [running]:
	runtime.throw({0x50c9b3?, 0x2?})
		runtime/panic.go:1047 +0x5d fp=0xc000063660 sp=0xc000063630 pc=0x43245d
	runtime.unreachableMethod()
		runtime/iface.go:532 +0x25 fp=0xc000063680 sp=0xc000063660 pc=0x40a845
	runtime.call16(0xc00010a360, 0xc00000e0a8, 0x0, 0x0, 0x0, 0x8, 0xc000063bb0)
		runtime/wcS9OpRFL:728 +0x49 fp=0xc0000636a0 sp=0xc000063680 pc=0x45eae9
	runtime.reflectcall(0xc00001c120?, 0x1?, 0x1?, 0x18110?, 0xc0?, 0x1?, 0x1?)
		<autogenerated>:1 +0x3c fp=0xc0000636e0 sp=0xc0000636a0 pc=0x462e9c

Avoid obfuscating the three names which cause problems: "reflect",
"Method", and "MethodByName".

While here, we also teach obfuscatedImportPath to skip "runtime",
as I also saw that the toolchain detects it for many reasons.
That wasn't a problem yet, as we do not obfuscate the runtime,
but it was likely going to become a problem in the future.

											
										
										
											2 years ago
+										case "reflect":
-												avoid breaking github.com/davecgh/go-spew

It assumes that reflect.Value has a field named "flag",
which wasn't the case with obfuscated builds as we obfuscated it.

We already treated the reflect package as special,
for instance when not obfuscating Method or MethodByName.
In a similar fashion, mark reflect's rtype and Value types to not be
obfuscated alongside their field names. Note that rtype is the
implementation behind the reflect.Type interface.

This fix is fairly manual and repetitive.
transformCompile, transformLinkname, and transformAsm should all
use the same mechanism to tell if names should be obfuscated.
However, they do not do that right now, and that refactor feels too
risky for a bugfix release. We add more TODOs instead.

We're not adding go-spew to scripts/check-third-party.sh since the
project is largely abandoned. It's not even a Go module yet.
The only broken bit from it is what we've added to our tests.

Fixes #676.

											
										
										
											1 year ago
+											switch name {
-												avoid reflect method call panics with GOGARBLE=*

We were obfuscating reflect's package path and its declared names,
but the toolchain wants to detect the presence of method reflection
to turn down the aggressiveness of dead code elimination.

Given that the obfuscation broke the detection,
we could easily end up in crashes when making reflect calls:

	fatal error: unreachable method called. linker bug?

	goroutine 1 [running]:
	runtime.throw({0x50c9b3?, 0x2?})
		runtime/panic.go:1047 +0x5d fp=0xc000063660 sp=0xc000063630 pc=0x43245d
	runtime.unreachableMethod()
		runtime/iface.go:532 +0x25 fp=0xc000063680 sp=0xc000063660 pc=0x40a845
	runtime.call16(0xc00010a360, 0xc00000e0a8, 0x0, 0x0, 0x0, 0x8, 0xc000063bb0)
		runtime/wcS9OpRFL:728 +0x49 fp=0xc0000636a0 sp=0xc000063680 pc=0x45eae9
	runtime.reflectcall(0xc00001c120?, 0x1?, 0x1?, 0x18110?, 0xc0?, 0x1?, 0x1?)
		<autogenerated>:1 +0x3c fp=0xc0000636e0 sp=0xc0000636a0 pc=0x462e9c

Avoid obfuscating the three names which cause problems: "reflect",
"Method", and "MethodByName".

While here, we also teach obfuscatedImportPath to skip "runtime",
as I also saw that the toolchain detects it for many reasons.
That wasn't a problem yet, as we do not obfuscate the runtime,
but it was likely going to become a problem in the future.

											
										
										
											2 years ago
+											// Per the linker's deadcode.go docs,
 											// the Method and MethodByName methods are what drive the logic.
 											case "Method", "MethodByName":
 												return true
 											}
-												fix encoding/asn1 marshaling of pkix types

See the added comment and test, which failed before the fix when
the encoded certificate was decoded.

It took a while to find the culprit in asn1, but in hindsight it's easy:

	case reflect.Slice:
		if t.Elem().Kind() == reflect.Uint8 {
			return false, TagOctetString, false, true
		}
		if strings.HasSuffix(t.Name(), "SET") {
			return false, TagSet, true, true
		}
		return false, TagSequence, true, true

Fixes #711.

											
										
										
											1 year ago
+										case "crypto/x509/pkix":
 											// For better or worse, encoding/asn1 detects a "SET" suffix on slice type names
 											// to tell whether those slices should be treated as sets or sequences.
 											// Do not obfuscate those names to prevent breaking x509 certificates.
 											// TODO: we can surely do better; ideally propose a non-string-based solution
 											// upstream, or as a fallback, obfuscate to a name ending with "SET".
 											if strings.HasSuffix(name, "SET") {
 												return true
 											}
-												support embedding via embed.FS

We already added support for "//go:embed" with string and []byte,
by not obfuscating the "embed" import path.

However, embed.FS was still failing:

	> garble build
	[stderr]
	# test/main
	:13: go:embed cannot apply to var of type embed.WtKNvwbN

The compiler detects the type by matching its name to exactly "embed.FS",
so don't obfuscate the name "FS" either.

While at it, ensure that the embed code behaves the same with "go build".

Updates #349.

											
										
										
											3 years ago
+										}
-												clarify how each "cannot obfuscate" map works

We used to record all objects in cannotObfuscateNames,
and then we'd add the exported ones to KnownCannotObfuscate.

Instead, teach recordAsNotObfuscated to store each object in either
knownCannotObfuscateUnexported or KnownCannotObfuscate, but not both.
The former isn't cached so it uses in-memory pointers as keys,
and the latter uses the cross-process objectStrings like before.

Functionally, this is all the same, but with the difference that the map
indexed by types.Object will not contain objects already recorded in
KnownCannotObfuscate, reducing the amount of duplicate memory use.

While here, give recordIgnore a less ambiguous name,
and remove the second parameter as it was always tf.pkg.Path().
This also means we can compare *types.Package pointers directly.

Finally, add more TODOs for further improvement ideas.
It does mean that we end up with more TODOs than before,
even though I'm fixing one, but I reckon that's a good thing.
Recording these ideas can give first-time contributors ways to help,
and it ensures I don't forget about ideas just in my head.

											
										
										
											2 years ago
+										// The package that declared this object did not obfuscate it.
-												move curPkgCache out of the global scope

loadPkgCache also uses different pkgCache variables
depending on whether it finds a direct cache hit or not.

Now we only initialize an entirely new pkgCache
with the first two ReflectAPIs entries when we get a direct cache miss,
since a direct cache hit will already load those from the cache.

											
										
										
											1 year ago
+										if usedForReflect(tf.curPkgCache, obj) {
-												stop loading obfuscated type information from deps

If package P1 imports package P2, P1 needs to know which names from P2
weren't obfuscated. For instance, if P2 declares T2 and does
"reflect.TypeOf(T2{...})", then P2 won't obfuscate the name T2, and
neither should P1.

This information should flow from P2 to P1, as P2 builds before
P1. We do this via obfuscatedTypesPackage; P1 loads the type information
of the obfuscated version of P2, and does a lookup for T2. If T2 exists,
then it wasn't obfuscated.

This mechanism has served us well, but it has downsides:

1) It wastes CPU; we load the type information for the entire package.

2) It's complex; for instance, we need KnownObjectFiles as an extra.

3) It makes our code harder to understand, as we load both the original
   and obfuscated type informaiton.

Instead, we now have each package record what names were not obfuscated
as part of its cachedOuput file. Much like KnownObjectFiles, the map
records incrementally through the import graph, to avoid having to load
cachedOutput files for indirect dependencies.

We shouldn't need to worry about those maps getting large;
we only skip obfuscating declared names in a few uncommon scenarios,
such as the use of reflection or cgo's "//export".

Since go/types is relatively allocation-heavy, and the export files
contain a lot of data, we get a nice speed-up:

	name      old time/op         new time/op         delta
	Build-16          11.5s ± 2%          11.1s ± 3%  -3.77%  (p=0.008 n=5+5)

	name      old bin-B           new bin-B           delta
	Build-16          5.15M ± 0%          5.15M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          375ms ± 3%          341ms ± 6%  -8.96%  (p=0.008 n=5+5)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          283ms ±17%          289ms ±13%    ~     (p=0.841 n=5+5)

	name      old user-time/op    new user-time/op    delta
	Build-16          687ms ± 6%          664ms ± 7%    ~     (p=0.548 n=5+5)

Fixes #456.
Updates #475.

											
										
										
											2 years ago
+											return true
 										}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										lpkg, err := listPackage(tf.curPkg, path)
-												refactor "current package" with TOOLEXEC_IMPORTPATH (#266)

Now that we've dropped support for Go 1.15.x, we can finally rely on
this environment variable for toolexec calls, present in Go 1.16.

Before, we had hacky ways of trying to figure out the current package's
import path, mostly from the -p flag. The biggest rough edge there was
that, for main packages, that was simply the package name, and not its
full import path.

To work around that, we had a restriction on a single main package, so
we could work around that issue. That restriction is now gone.

The new code is simpler, especially because we can set curPkg in a
single place for all toolexec transform funcs.

Since we can always rely on curPkg not being nil now, we can also start
reusing listedPackage.Private and avoid the majority of repeated calls
to isPrivate. The function is cheap, but still not free.

isPrivate itself can also get simpler. We no longer have to worry about
the "main" edge case. Plus, the sanity check for invalid package paths
is now unnecessary; we only got malformed paths from goobj2, and we now
require exact matches with the ImportPath field from "go list -json".

Another effect of clearing up the "main" edge case is that -debugdir now
uses the right directory for main packages. We also start using
consistent debugdir paths in the tests, for the sake of being easier to
read and maintain.

Finally, note that commandReverse did not need the extra call to "go
list -toolexec", as the "shared" call stored in the cache is enough. We
still call toolexecCmd to get said cache, which should probably be
simplified in a future PR.

While at it, replace the use of the "-std" compiler flag with the
Standard field from "go list -json".
											
										
										
											3 years ago
+										if err != nil {
 											panic(err) // shouldn't happen
 										}
-												deprecate using GOPRIVATE in favor of GOGARBLE (#427)

Piggybacking off of GOPRIVATE is great for a number of reasons:

* People tend to obfuscate private code, whose package paths will
  generally be in GOPRIVATE already

* Its meaning and syntax are well understood

* It allows all the flexibility we need without adding our own env var
  or config option

However, using GOPRIVATE directly has one main drawback.
It's fairly common to also want to obfuscate public dependencies,
to make the code in private packages even harder to follow.
However, using "GOPRIVATE=*" will result in two main downsides:

* GONOPROXY defaults to GOPRIVATE, so the proxy would be entirely disabled.
  Downloading modules, such as when adding or updating dependencies,
  or when the local cache is cold, can be less reliable.

* GONOSUMDB defaults to GOPRIVATE, so the sumdb would be entirely disabled.
  Adding entries to go.sum, such as when adding or updating dependencies,
  can be less secure.

We will continue to consume GOPRIVATE as a fallback,
but we now expect users to set GOGARBLE instead.
The new logic is documented in the README.

While here, rewrite some uses of "private" with "to obfuscate",
to make the code easier to follow and harder to misunderstand.

Fixes #276.
											
										
										
											3 years ago
+										if !lpkg.ToObfuscate {
 											return true // we're not obfuscating this package
-												Fix bug where structs would get garbled in some packages but not in others (#161)

* fix bug where structs would get garbled in some packages but not in others

* only check if struct/field was not defined in current package

* fix a related bug when two objects share the same name in the same package and one is garbled but the other one is not

* renamed parameter for clarity
											
										
										
											4 years ago
+										}
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+										hashToUse := lpkg.GarbleActionID
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+										debugName := "variable"
-												Fix bug where structs would get garbled in some packages but not in others (#161)

* fix bug where structs would get garbled in some packages but not in others

* only check if struct/field was not defined in current package

* fix a related bug when two objects share the same name in the same package and one is garbled but the other one is not

* renamed parameter for clarity
											
										
										
											4 years ago
-												remove a couple of easy TODOs

First, I tried to follow my own past advice to only set GarbleActionID
if ToObfuscate is true. However, that broke at least three parts of
transformCompile, as the hash is used for more than I recalled.
Give up on that idea, because the current code is working as intended.
Better document what GarbleActionID is and what we use it for.

Second, now that https://go.dev/cl/348741 was shipped with Go 1.18,
using the logger when its output is io.Discard is already a no-op.
So we no longer need our debugf wrapper to apply the no-op logic.

											
										
										
											2 years ago
+										// log.Printf("%s: %#v %T", fset.Position(node.Pos()), node, obj)
-												simplify the uses of obfuscatedTypesPackage (#284)

obfuscatedTypesPackage is now only useful for one scenario: a type which
is used with reflection in a dependency, so neither its name nor any of
its potential struct field names are obfuscated.

This is because we can only detect the use of reflection with go/ast,
which we don't have for dependencies.

As such, we only need obfuscatedTypesPackage in two places - when
considering to obfuscate a field or a type name.

There were two other calls to the function, which we remove.

The first was used for linkname directives. Those directives only work
for variables and functions, neither of which is affected by the
reflection detection.

The second was used for all identifiers, at the very end of the
transformGo inner func. It's entirely unnecessary right now, as it never
triggers anymore. It's possible it was necessary some time ago when we
still didn't obfuscate assembly functions.

While at it, improve some comments and add a few TODOs for edge cases
which do not have code coverage.
											
										
										
											3 years ago
+										switch obj := obj.(type) {
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+										case *types.Var:
-												fix a number of issues involving types from indirect imports

obfuscatedTypesPackage is used to figure out if a name in a dependency
package was obfuscated or not. For example, if that package used
reflection on a named type, it wasn't obfuscated, so we must have the
same information to not obfuscate the same name downstream.

obfuscatedTypesPackage could return nil if the package was indirectly
imported, though. This can happen if a direct import has a function that
returns an indirect type, or if a direct import exposes a name that's a
type alias to an indirect type.

We sort of dealt with this in two pieces of code by checking for
obfPkg!=nil, but a third one did not have this check and caused a panic
in the added test case:

	--- FAIL: TestScripts/reflect (0.81s)
	    testscript.go:397:
	        > env GOPRIVATE=test/main
	        > garble build
	        [stderr]
	        # test/main
	        panic: runtime error: invalid memory address or nil pointer dereference [recovered]
	        	panic: runtime error: invalid memory address or nil pointer dereference
	        [signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0x8a5e39]

More importantly though, the nil check only avoids panics. It doesn't
fix the root cause of the problem: that importcfg does not contain
indirectly imported packages. The added test case would still fail, as
we would obfuscate a type in the main package, but not in the indirectly
imported package where the type is defined.

To fix this, resurrect a bit of code from earlier garble versions, which
uses "go list -toolexec=garble" to fetch a package's export file. This
lets us fill the indirect import gaps in importcfg, working around the
problem entirely.

This solution is still not particularly great, so we add a TODO about
possibly rethinking this in the future. It does add some overhead and
complexity, though thankfully indirect imports should be uncommon.

This fixes a few panics while building the protobuf module.

											
										
										
											3 years ago
+											if !obj.IsField() {
-												obfuscate all variable names, even local ones (#420)

In the added test case, "garble -literals build" would fail:

	--- FAIL: TestScripts/literals (8.29s)
		testscript.go:397:
			> env GOPRIVATE=test/main
			> garble -literals build
			[stderr]
			# test/main
			Usz1FmFm.go:1: cannot call non-function string (type int), declared at Usz1FmFm.go:1
			Usz1FmFm.go:1: string is not a type
			Usz1FmFm.go:1: cannot call non-function append (type int), declared at Usz1FmFm.go:1

That is, for input code such as:

	var append int
	println("foo")
	_ = append

We'd end up with obfuscated code like:

	var append int
	println(func() string {
		// obfuscation...
		x = append(x, ...)
		// obfuscation...
		return string(x)
	})
	_ = append

Which would then break, as the code is shadowing the "append" builtin.
To work around this, always obfuscate variable names, so we end up with:

	var mwu1xuNz int
	println(func() string {
		// obfuscation...
		x = append(x, ...)
		// obfuscation...
		return string(x)
	})
	_ = mwu1xuNz

This change shouldn't make the quality of our obfuscation stronger,
as local variable names do not currently end up in Go binaries.
However, this does make garble more consistent in treating identifiers,
and it completely avoids any issues related to shadowing builtins.

Moreover, this also paves the way for publishing obfuscated source code,
such as #369.

Fixes #417.
											
										
										
											3 years ago
+												// Identifiers denoting variables are always obfuscated.
-												fix a number of issues involving types from indirect imports

obfuscatedTypesPackage is used to figure out if a name in a dependency
package was obfuscated or not. For example, if that package used
reflection on a named type, it wasn't obfuscated, so we must have the
same information to not obfuscate the same name downstream.

obfuscatedTypesPackage could return nil if the package was indirectly
imported, though. This can happen if a direct import has a function that
returns an indirect type, or if a direct import exposes a name that's a
type alias to an indirect type.

We sort of dealt with this in two pieces of code by checking for
obfPkg!=nil, but a third one did not have this check and caused a panic
in the added test case:

	--- FAIL: TestScripts/reflect (0.81s)
	    testscript.go:397:
	        > env GOPRIVATE=test/main
	        > garble build
	        [stderr]
	        # test/main
	        panic: runtime error: invalid memory address or nil pointer dereference [recovered]
	        	panic: runtime error: invalid memory address or nil pointer dereference
	        [signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0x8a5e39]

More importantly though, the nil check only avoids panics. It doesn't
fix the root cause of the problem: that importcfg does not contain
indirectly imported packages. The added test case would still fail, as
we would obfuscate a type in the main package, but not in the indirectly
imported package where the type is defined.

To fix this, resurrect a bit of code from earlier garble versions, which
uses "go list -toolexec=garble" to fetch a package's export file. This
lets us fill the indirect import gaps in importcfg, working around the
problem entirely.

This solution is still not particularly great, so we add a TODO about
possibly rethinking this in the future. It does add some overhead and
complexity, though thankfully indirect imports should be uncommon.

This fixes a few panics while building the protobuf module.

											
										
										
											3 years ago
+												break
 											}
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+											debugName = "field"
-												fix a number of issues involving types from indirect imports

obfuscatedTypesPackage is used to figure out if a name in a dependency
package was obfuscated or not. For example, if that package used
reflection on a named type, it wasn't obfuscated, so we must have the
same information to not obfuscate the same name downstream.

obfuscatedTypesPackage could return nil if the package was indirectly
imported, though. This can happen if a direct import has a function that
returns an indirect type, or if a direct import exposes a name that's a
type alias to an indirect type.

We sort of dealt with this in two pieces of code by checking for
obfPkg!=nil, but a third one did not have this check and caused a panic
in the added test case:

	--- FAIL: TestScripts/reflect (0.81s)
	    testscript.go:397:
	        > env GOPRIVATE=test/main
	        > garble build
	        [stderr]
	        # test/main
	        panic: runtime error: invalid memory address or nil pointer dereference [recovered]
	        	panic: runtime error: invalid memory address or nil pointer dereference
	        [signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0x8a5e39]

More importantly though, the nil check only avoids panics. It doesn't
fix the root cause of the problem: that importcfg does not contain
indirectly imported packages. The added test case would still fail, as
we would obfuscate a type in the main package, but not in the indirectly
imported package where the type is defined.

To fix this, resurrect a bit of code from earlier garble versions, which
uses "go list -toolexec=garble" to fetch a package's export file. This
lets us fill the indirect import gaps in importcfg, working around the
problem entirely.

This solution is still not particularly great, so we add a TODO about
possibly rethinking this in the future. It does add some overhead and
complexity, though thankfully indirect imports should be uncommon.

This fixes a few panics while building the protobuf module.

											
										
										
											3 years ago
+											// From this point on, we deal with struct fields.
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+											// Fields don't get hashed with the package's action ID.
 											// They get hashed with the type of their parent struct.
 											// This is because one struct can be converted to another,
 											// as long as the underlying types are identical,
 											// even if the structs are defined in different packages.
 											//
 											// TODO: Consider only doing this for structs where all
 											// fields are exported. We only need this special case
 											// for cross-package conversions, which can't work if
 											// any field is unexported. If that is done, add a test
 											// that ensures unexported fields from different
 											// packages result in different obfuscated names.
-												fix a number of issues involving types from indirect imports

obfuscatedTypesPackage is used to figure out if a name in a dependency
package was obfuscated or not. For example, if that package used
reflection on a named type, it wasn't obfuscated, so we must have the
same information to not obfuscate the same name downstream.

obfuscatedTypesPackage could return nil if the package was indirectly
imported, though. This can happen if a direct import has a function that
returns an indirect type, or if a direct import exposes a name that's a
type alias to an indirect type.

We sort of dealt with this in two pieces of code by checking for
obfPkg!=nil, but a third one did not have this check and caused a panic
in the added test case:

	--- FAIL: TestScripts/reflect (0.81s)
	    testscript.go:397:
	        > env GOPRIVATE=test/main
	        > garble build
	        [stderr]
	        # test/main
	        panic: runtime error: invalid memory address or nil pointer dereference [recovered]
	        	panic: runtime error: invalid memory address or nil pointer dereference
	        [signal SIGSEGV: segmentation violation code=0x1 addr=0x20 pc=0x8a5e39]

More importantly though, the nil check only avoids panics. It doesn't
fix the root cause of the problem: that importcfg does not contain
indirectly imported packages. The added test case would still fail, as
we would obfuscate a type in the main package, but not in the indirectly
imported package where the type is defined.

To fix this, resurrect a bit of code from earlier garble versions, which
uses "go list -toolexec=garble" to fetch a package's export file. This
lets us fill the indirect import gaps in importcfg, working around the
problem entirely.

This solution is still not particularly great, so we add a TODO about
possibly rethinking this in the future. It does add some overhead and
complexity, though thankfully indirect imports should be uncommon.

This fixes a few panics while building the protobuf module.

											
										
										
											3 years ago
+											strct := tf.fieldToStruct[obj]
 											if strct == nil {
-												split typecheck and loadPkgCache from transformer

Neither of them has anything to do with transforming Go code;
they simply load or compute the information necessary for doing so.

Split typecheck into two functions as well.
The new typecheck function only does typechecking and nothing else.
A new comptueFieldToStruct func fills the fieldToStruct map,
which depends on typecheck, but is not needed when computing pkgCache.

This isolation also forces us to separate the code that fills pkgCache
from the code that fills the in-memory-only maps in transformer,
removing the need for the NOTE that we had left around as a reminder.

											
										
										
											1 year ago
+												panic("could not find struct for field " + name)
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
+											}
-												strip struct tags when hashing structs for type identity

This was a long standing TODO, and a user finally ran into it.
The fix isn't terribly straightforward, but I'm pretty happy with it.

Add a test case where the same struct field is identical
with no tag, with the "tagged1" json tag, and again with "tagged2".
While here, we add a test case for a regular named field too.

Fixes #801.

											
										
										
											7 months ago
+											node.Name = hashWithStruct(strct, obj)
-												avoid `...any` allocs in debug logs in hot loops

These lines get executed for every identifier in every package in each
Go build, so one allocation per log.Printf call can quickly add up to
millions of allocations across a build.

Until https://go.dev/issue/53465 is fixed, the best way to avoid the
escaping due to `...any` is to not perform the function call at all.

	name      old time/op         new time/op         delta
	Build-16          10.5s ± 1%          10.5s ± 2%    ~     (p=0.604 n=9+10)

	name      old bin-B           new bin-B           delta
	Build-16          5.52M ± 0%          5.52M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          506ms ±13%          500ms ± 7%    ~     (p=0.739 n=10+10)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          31.7M ± 0%          30.1M ± 0%  -5.33%  (p=0.000 n=10+9)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          5.70s ± 5%          5.78s ± 6%    ~     (p=0.278 n=9+10)

											
										
										
											2 years ago
+											if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed
 												log.Printf("%s %q hashed with struct fields to %q", debugName, name, node.Name)
 											}
-												make obfuscation fully deterministic with -seed

The default behavior of garble is to seed via the build inputs,
including the build IDs of the entire Go build of each package.
This works well as a default, and does give us determinism,
but it means that building for different platforms
will result in different obfuscation per platform.

Instead, when -seed is provided, don't use any other hash seed or salt.
This means that a particular Go name will be obfuscated the same way
as long as the seed, package path, and name itself remain constant.

In other words, when the user supplies a custom -seed,
we assume they know what they're doing in terms of storage and rotation.

Expand the README docs with more examples and detail.

Fixes #449.

											
										
										
											2 years ago
+											return true
-												hash field names equally in all packages

Packages P1 and P2 can define identical struct types T1 and T2, and one
can convert from type T1 to T2 or vice versa.

The spec defines two identical struct types as:

	Two struct types are identical if they have the same sequence of
	fields, and if corresponding fields have the same names, and
	identical types, and identical tags. Non-exported field names
	from different packages are always different.

Unfortunately, garble broke this: since we obfuscated field names
differently depending on the package, cross-package conversions like the
case above would result in typechecking errors.

To fix this, implement Joe Tsai's idea: hash struct field names with the
string representation of the entire struct. This way, identical struct
types will have their field names obfuscated in the same way in all
packages across a build.

Note that we had to refactor "reverse" a bit to start using transformer,
since now it needs to keep track of struct types as well.

This failure was affecting the build of google.golang.org/protobuf,
since it makes regular use of cross-package struct conversions.

Note that the protobuf module still fails to build, but for other
reasons. The package that used to fail now succeeds, so the build gets a
bit further than before. #240 tracks adding relevant third-party Go
modules to CI, so we'll track the other remaining failures there.

Fixes #310.

											
										
										
											3 years ago
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+										case *types.TypeName:
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+											debugName = "type"
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+										case *types.Func:
-												avoid breaking intrinsics when obfuscating names

We obfuscate import paths as well as their declared names.
The compiler treats some packages and APIs in special ways,
and the way it detects those is by looking at import paths and names.

In the past, we have avoided obfuscating some names like embed.FS or
reflect.Value.MethodByName for this reason. Otherwise,
go:embed or the linker's deadcode elimination might be broken.

This matching by path and name also happens with compiler intrinsics.
Intrinsics allow the compiler to rewrite some standard library calls
with small and efficient assembly, depending on the target GOARCH.
For example, math/bits.TrailingZeros32 gets replaced with ssa.OpCtz32,
which on amd64 may result in using the TZCNTL instruction.

We never noticed that we were breaking many of these intrinsics.
The intrinsics for funcs declared in the runtime and its dependencies
still worked properly, as we do not obfuscate those packages yet.
However, for other packages like math/bits and sync/atomic,
the intrinsics were being entirely disabled due to obfuscated names.

Skipping intrinsics is particularly bad for performance,
and it also leads to slightly larger binaries:

			 │      old      │                 new                 │
			 │     bin-B     │     bin-B      vs base              │
	Build-16   5.450Mi ± ∞ ¹   5.333Mi ± ∞ ¹  -2.15% (p=0.029 n=4)

Finally, the main reason we noticed that intrinsics were broken
is that apparently GOARCH=mips fails to link without them,
as some symbols end up being not defined at all.
This patch fixes builds for the MIPS family of architectures.

Rather than building and linking all of std for every GOARCH,
test that intrinsics work by asking the compiler to print which
intrinsics are being applied, and checking that math/bits gets them.

This fix is relatively unfortunate, as it means we stop obfuscating
about 120 function names and a handful of package paths.
However, fixing builds and intrinsics is much more important.
We can figure out better ways to deal with intrinsics in the future.

Fixes #646.

											
										
										
											1 year ago
+											if compilerIntrinsicsFuncs[path+"."+name] {
 												return true
 											}
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+											sign := obj.Type().(*types.Signature)
-												add -debug to aid in debugging (#431)

Not really meant for end users,
but they might still debug failures before filing bugs.

We add the -debug flag itself,
as well as machinery to deduplicate output lines.
There are quite a lot of them otherwise,
which aren't helpful and simply add noise.

In the future, if we always want to output a debug log line,
such as "choosing not to obfuscate here because X",
we can simply insert the unique position string.

Finally, turn all commented-out log.Printf calls to debugf.
Improve a few log lines to be more human-friendly,
and also add a few extras like how long it takes to load files.

We can improve the logging further in the future.
This seems like a good starting point.
											
										
										
											2 years ago
+											if sign.Recv() == nil {
 												debugName = "func"
 											} else {
 												debugName = "method"
 											}
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+											if obj.Exported() && sign.Recv() != nil {
 												return true // might implement an interface
-												initial support for cgo

I'm sure that the added test case doesn't cover many edge cases, but
it's a start.

Fixes #12.

											
										
										
											4 years ago
+											}
-												implement TODO to use a name variable

Slightly simplifies the main chunk of code.
While here, improve the wording for when Go isn't installed correctly.

											
										
										
											2 years ago
+											switch name {
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+											case "main", "init", "TestMain":
 												return true // don't break them
 											}
-												implement TODO to use a name variable

Slightly simplifies the main chunk of code.
While here, improve the wording for when Go isn't installed correctly.

											
										
										
											2 years ago
+											if strings.HasPrefix(name, "Test") && isTestSignature(sign) {
-												clean up the function that walks the syntax tree

Avoiding a type switch for the entire node prevents an indentation
level.

We can obtain obj and pkg early, and return early as well if either is
uninteresting. That means less nil checks later on, which means even
less indentation and complexity.

											
										
										
											4 years ago
+												return true // don't break tests
 											}
 										default:
 											return true // we only want to rename the above
 										}
-												fix garbling names belonging to indirect imports (#203)

main.go includes a lengthy comment that documents this edge case, why it
happened, and how we are fixing it. To summarize, we should no longer
error with a build error in those cases. Read the comment for details.

A few other minor changes were done to allow writing this patch.

First, the actionID and contentID funcs were renamed, since they started
to collide with variable names.

Second, the logging has been improved a bit, which allowed me to debug
the issue.

Third, the "cache" global shared by all garble sub-processes now
includes the necessary parameters to run "go list -toolexec", including
the path to garble and the build flags being used.

Thanks to lu4p for writing a test case, which also applied gofmt to that
testdata Go file.

Fixes #180.
Closes #181, since it includes its test case.
											
										
										
											3 years ago
-												make obfuscation fully deterministic with -seed

The default behavior of garble is to seed via the build inputs,
including the build IDs of the entire Go build of each package.
This works well as a default, and does give us determinism,
but it means that building for different platforms
will result in different obfuscation per platform.

Instead, when -seed is provided, don't use any other hash seed or salt.
This means that a particular Go name will be obfuscated the same way
as long as the seed, package path, and name itself remain constant.

In other words, when the user supplies a custom -seed,
we assume they know what they're doing in terms of storage and rotation.

Expand the README docs with more examples and detail.

Fixes #449.

											
										
										
											2 years ago
+										node.Name = hashWithPackage(lpkg, name)
 										// TODO: probably move the debugf lines inside the hash funcs
-												avoid `...any` allocs in debug logs in hot loops

These lines get executed for every identifier in every package in each
Go build, so one allocation per log.Printf call can quickly add up to
millions of allocations across a build.

Until https://go.dev/issue/53465 is fixed, the best way to avoid the
escaping due to `...any` is to not perform the function call at all.

	name      old time/op         new time/op         delta
	Build-16          10.5s ± 1%          10.5s ± 2%    ~     (p=0.604 n=9+10)

	name      old bin-B           new bin-B           delta
	Build-16          5.52M ± 0%          5.52M ± 0%    ~     (all equal)

	name      old cached-time/op  new cached-time/op  delta
	Build-16          506ms ±13%          500ms ± 7%    ~     (p=0.739 n=10+10)

	name      old mallocs/op      new mallocs/op      delta
	Build-16          31.7M ± 0%          30.1M ± 0%  -5.33%  (p=0.000 n=10+9)

	name      old sys-time/op     new sys-time/op     delta
	Build-16          5.70s ± 5%          5.78s ± 6%    ~     (p=0.278 n=9+10)

											
										
										
											2 years ago
+										if flagDebug { // TODO(mvdan): remove once https://go.dev/issue/53465 if fixed
 											log.Printf("%s %q hashed with %x… to %q", debugName, name, hashToUse[:4], node.Name)
 										}
-												start hashing identifiers

											
										
										
											5 years ago
+										return true
-												support type switches with symbolic vars

											
										
										
											5 years ago
+									}
-												join the import-rewrite ast.Inspect with transformGo

Walking the entire Go file again seems unnecessary, when we have an
astutil.Apply just next to it.

We could add to its "pre" function, but it's a bit easier to use the
"post" instead, which is empty.

Another advantage here is that astutil.Apply is more powerful than
ast.Inspect, which can come in handy in the future.

											
										
										
											3 years ago
+									post := func(cursor *astutil.Cursor) bool {
 										imp, ok := cursor.Node().(*ast.ImportSpec)
 										if !ok {
 											return true
 										}
 										path, err := strconv.Unquote(imp.Path.Value)
 										if err != nil {
 											panic(err) // should never happen
 										}
 										// We're importing an obfuscated package.
 										// Replace the import path with its obfuscated version.
 										// If the import was unnamed, give it the name of the
 										// original package name, to keep references working.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										lpkg, err := listPackage(tf.curPkg, path)
-												join the import-rewrite ast.Inspect with transformGo

Walking the entire Go file again seems unnecessary, when we have an
astutil.Apply just next to it.

We could add to its "pre" function, but it's a bit easier to use the
"post" instead, which is empty.

Another advantage here is that astutil.Apply is more powerful than
ast.Inspect, which can come in handy in the future.

											
										
										
											3 years ago
+										if err != nil {
 											panic(err) // should never happen
 										}
-												deprecate using GOPRIVATE in favor of GOGARBLE (#427)

Piggybacking off of GOPRIVATE is great for a number of reasons:

* People tend to obfuscate private code, whose package paths will
  generally be in GOPRIVATE already

* Its meaning and syntax are well understood

* It allows all the flexibility we need without adding our own env var
  or config option

However, using GOPRIVATE directly has one main drawback.
It's fairly common to also want to obfuscate public dependencies,
to make the code in private packages even harder to follow.
However, using "GOPRIVATE=*" will result in two main downsides:

* GONOPROXY defaults to GOPRIVATE, so the proxy would be entirely disabled.
  Downloading modules, such as when adding or updating dependencies,
  or when the local cache is cold, can be less reliable.

* GONOSUMDB defaults to GOPRIVATE, so the sumdb would be entirely disabled.
  Adding entries to go.sum, such as when adding or updating dependencies,
  can be less secure.

We will continue to consume GOPRIVATE as a fallback,
but we now expect users to set GOGARBLE instead.
The new logic is documented in the README.

While here, rewrite some uses of "private" with "to obfuscate",
to make the code easier to follow and harder to misunderstand.

Fixes #276.
											
										
										
											3 years ago
+										if !lpkg.ToObfuscate {
-												join the import-rewrite ast.Inspect with transformGo

Walking the entire Go file again seems unnecessary, when we have an
astutil.Apply just next to it.

We could add to its "pre" function, but it's a bit easier to use the
"post" instead, which is empty.

Another advantage here is that astutil.Apply is more powerful than
ast.Inspect, which can come in handy in the future.

											
										
										
											3 years ago
+											return true
 										}
-												support `garble test` in main packages

A main package can be imported in one edge case we weren't testing:
when the same main package has any tests, which implicitly import main.

Before the fix, we would panic:

        > garble test -v ./...
        # test/bar/somemaintest.test
        panic: main packages should never need to obfuscate their import paths

											
										
										
											2 years ago
+										if lpkg.Name != "main" {
 											newPath := lpkg.obfuscatedImportPath()
 											imp.Path.Value = strconv.Quote(newPath)
 										}
-												join the import-rewrite ast.Inspect with transformGo

Walking the entire Go file again seems unnecessary, when we have an
astutil.Apply just next to it.

We could add to its "pre" function, but it's a bit easier to use the
"post" instead, which is empty.

Another advantage here is that astutil.Apply is more powerful than
ast.Inspect, which can come in handy in the future.

											
										
										
											3 years ago
+										if imp.Name == nil {
-												avoid go/printer from breaking imports

We obfuscate import paths in import declarations like:

	"domain.com/somepkg"

by replacing them with the obfuscated package path:

	somepkg "HPS4Mskq"

Note how we add a name to the import if there wasn't one,
so that references like somepkg.Foo keep working in the code.

This could break in some edge cases involving comments between imports
in the Go code, because go/printer is somewhat brittle with positions:

	> garble build -tags buildtag
	[stderr]
	# test/main/importedpkg
	:16: syntax error: missing import path
	exit status 2
	exit status 2

To prevent that, ensure the name has a reasonable position.

This was preventing github.com/gorilla/websocket from being obufscated.
It is a fairly popular library in Go, but we don't add it to
scripts/check-third-party.sh for now as wireguard already gives us
coverage over networking and cryptography.

											
										
										
											2 years ago
+											imp.Name = &ast.Ident{
 												NamePos: imp.Path.ValuePos, // ensure it ends up on the same line
 												Name:    lpkg.Name,
 											}
-												join the import-rewrite ast.Inspect with transformGo

Walking the entire Go file again seems unnecessary, when we have an
astutil.Apply just next to it.

We could add to its "pre" function, but it's a bit easier to use the
"post" instead, which is empty.

Another advantage here is that astutil.Apply is more powerful than
ast.Inspect, which can come in handy in the future.

											
										
										
											3 years ago
+										}
 										return true
 									}
 									return astutil.Apply(file, pre, post).(*ast.File)
-												start hashing identifiers

											
										
										
											5 years ago
+								}
-												blacklist struct fields with reflection too

In the added test, the unexported field used to be garbled.

Reflection can only reach exported methods, exported fields, and
unexported fields. Exported methods and fields are currently never
garbled, so unexported fields was the only missing piece.

											
										
										
											4 years ago
+								// named tries to obtain the *types.Named behind a type, if there is one.
 								// This is useful to obtain "testing.T" from "*testing.T", or to obtain the type
 								// declaration object from an embedded field.
 								func namedType(t types.Type) *types.Named {
-												don't panic with struct pointer anonymous fields

While at it, make the "object of type" code shared and more robust.

											
										
										
											5 years ago
+									switch t := t.(type) {
 									case *types.Named:
-												blacklist struct fields with reflection too

In the added test, the unexported field used to be garbled.

Reflection can only reach exported methods, exported fields, and
unexported fields. Exported methods and fields are currently never
garbled, so unexported fields was the only missing piece.

											
										
										
											4 years ago
+										return t
-												don't panic with struct pointer anonymous fields

While at it, make the "object of type" code shared and more robust.

											
										
										
											5 years ago
+									case interface{ Elem() types.Type }:
-												blacklist struct fields with reflection too

In the added test, the unexported field used to be garbled.

Reflection can only reach exported methods, exported fields, and
unexported fields. Exported methods and fields are currently never
garbled, so unexported fields was the only missing piece.

											
										
										
											4 years ago
+										return namedType(t.Elem())
-												don't panic with struct pointer anonymous fields

While at it, make the "object of type" code shared and more robust.

											
										
										
											5 years ago
+									default:
 										return nil
 									}
 								}
-												add initial support for running tests

For now, it mainly consists of not garbling Test* funcs, and not
garbling the _testmain.go file that will run them.

Updates #6.

											
										
										
											5 years ago
+								// isTestSignature returns true if the signature matches "func _(*testing.T)".
 								func isTestSignature(sign *types.Signature) bool {
 									if sign.Recv() != nil {
-												avoid panic on funcs that almost look like tests (#235)

The added test case used to crash garble:

	panic: runtime error: invalid memory address or nil pointer dereference [recovered]
		panic: runtime error: invalid memory address or nil pointer dereference
	[signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x8fe71b]

	goroutine 1 [running]:
	golang.org/x/tools/go/ast/astutil.Apply.func1(0xc0001e8880, 0xc000221570)
		/go/pkg/mod/golang.org/x/tools@v0.0.0-20210115202250-e0d201561e39/go/ast/astutil/rewrite.go:47 +0x97
	panic(0x975bc0, 0xd6c610)
		/sdk/go1.15.8/src/runtime/panic.go:969 +0x1b9
	go/types.(*Named).Obj(...)
		/sdk/go1.15.8/src/go/types/type.go:473
	mvdan.cc/garble.isTestSignature(0xc0001e7080, 0xa02e84)
		/src/garble/main.go:1170 +0x7b
	mvdan.cc/garble.(*transformer).transformGo.func2(0xc000122df0, 0xaac301)
		/src/garble/main.go:1028 +0xff1

We were assuming that the first parameter was a named type, but that
might not be the case.

This crash was found out in the wild, from which a minimal repro was
written. We add two variants of it to the test data, just in case.
											
										
										
											3 years ago
+										return false // test funcs don't have receivers
-												add initial support for running tests

For now, it mainly consists of not garbling Test* funcs, and not
garbling the _testmain.go file that will run them.

Updates #6.

											
										
										
											5 years ago
+									}
 									params := sign.Params()
 									if params.Len() != 1 {
-												avoid panic on funcs that almost look like tests (#235)

The added test case used to crash garble:

	panic: runtime error: invalid memory address or nil pointer dereference [recovered]
		panic: runtime error: invalid memory address or nil pointer dereference
	[signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x8fe71b]

	goroutine 1 [running]:
	golang.org/x/tools/go/ast/astutil.Apply.func1(0xc0001e8880, 0xc000221570)
		/go/pkg/mod/golang.org/x/tools@v0.0.0-20210115202250-e0d201561e39/go/ast/astutil/rewrite.go:47 +0x97
	panic(0x975bc0, 0xd6c610)
		/sdk/go1.15.8/src/runtime/panic.go:969 +0x1b9
	go/types.(*Named).Obj(...)
		/sdk/go1.15.8/src/go/types/type.go:473
	mvdan.cc/garble.isTestSignature(0xc0001e7080, 0xa02e84)
		/src/garble/main.go:1170 +0x7b
	mvdan.cc/garble.(*transformer).transformGo.func2(0xc000122df0, 0xaac301)
		/src/garble/main.go:1028 +0xff1

We were assuming that the first parameter was a named type, but that
might not be the case.

This crash was found out in the wild, from which a minimal repro was
written. We add two variants of it to the test data, just in case.
											
										
										
											3 years ago
+										return false // too many parameters for a test func
 									}
 									named := namedType(params.At(0).Type())
 									if named == nil {
 										return false // the only parameter isn't named, like "string"
-												add initial support for running tests

For now, it mainly consists of not garbling Test* funcs, and not
garbling the _testmain.go file that will run them.

Updates #6.

											
										
										
											5 years ago
+									}
-												avoid panic on funcs that almost look like tests (#235)

The added test case used to crash garble:

	panic: runtime error: invalid memory address or nil pointer dereference [recovered]
		panic: runtime error: invalid memory address or nil pointer dereference
	[signal SIGSEGV: segmentation violation code=0x1 addr=0x8 pc=0x8fe71b]

	goroutine 1 [running]:
	golang.org/x/tools/go/ast/astutil.Apply.func1(0xc0001e8880, 0xc000221570)
		/go/pkg/mod/golang.org/x/tools@v0.0.0-20210115202250-e0d201561e39/go/ast/astutil/rewrite.go:47 +0x97
	panic(0x975bc0, 0xd6c610)
		/sdk/go1.15.8/src/runtime/panic.go:969 +0x1b9
	go/types.(*Named).Obj(...)
		/sdk/go1.15.8/src/go/types/type.go:473
	mvdan.cc/garble.isTestSignature(0xc0001e7080, 0xa02e84)
		/src/garble/main.go:1170 +0x7b
	mvdan.cc/garble.(*transformer).transformGo.func2(0xc000122df0, 0xaac301)
		/src/garble/main.go:1028 +0xff1

We were assuming that the first parameter was a named type, but that
might not be the case.

This crash was found out in the wild, from which a minimal repro was
written. We add two variants of it to the test data, just in case.
											
										
										
											3 years ago
+									obj := named.Obj()
-												don't panic with struct pointer anonymous fields

While at it, make the "object of type" code shared and more robust.

											
										
										
											5 years ago
+									return obj != nil && obj.Pkg().Path() == "testing" && obj.Name() == "T"
-												add initial support for running tests

For now, it mainly consists of not garbling Test* funcs, and not
garbling the _testmain.go file that will run them.

Updates #6.

											
										
										
											5 years ago
+								}
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+								func (tf *transformer) transformLink(args []string) ([]string, error) {
-												initial support for build caching (#142)

As per the discussion in https://github.com/golang/go/issues/41145, it
turns out that we don't need special support for build caching in
-toolexec. We can simply modify the behavior of "[...]/compile -V=full"
and "[...]/link -V=full" so that they include garble's own version and
options in the printed build ID.

The part of the build ID that matters is the last, since it's the
"content ID" which is used to work out whether there is a need to redo
the action (build) or not. Since cmd/go parses the last word in the
output as "buildID=...", we simply add "+garble buildID=_/_/_/${hash}".
The slashes let us imitate a full binary build ID, but we assume that
the other components such as the action ID are not necessary, since the
only reader here is cmd/go and it only consumes the content ID.

The reported content ID includes the tool's original content ID,
garble's own content ID from the built binary, and the garble options
which modify how we obfuscate code. If any of the three changes, we
should use a different build cache key. GOPRIVATE also affects caching,
since a different GOPRIVATE value means that we might have to garble a
different set of packages.

Include tests, which mainly check that 'garble build -v' prints package
lines when we expect to always need to rebuild packages, and that it
prints nothing when we should be reusing the build cache even when the
built binary is missing.

After this change, 'go test' on Go 1.15.2 stabilizes at about 8s on my
machine, whereas it used to be at around 25s before.
											
										
										
											4 years ago
+									// We can't split by the ".a" extension, because cached object files
 									// lack any extension.
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+									flags, args := splitFlagsFromArgs(args)
-												Garble imports and package paths in GOPRIVATE (#116)

Finally, finally this is done. This allows import paths to be obfuscated by modifying
object/archive files and garbling import paths contained within. The bulk of the
code that makes parsing and writing Go object/archive files possible lives at
https://github.com/Binject/debug/tree/master/goobj2, which I wrote as well.

I have tested by garbling and checking for import paths via strings and grep
(in order of difficulty) https://github.com/lu4p/binclude, garble itself, and
https://github.com/dominikh/go-tools/tree/master/cmd/staticcheck.

This only supports object/archive files produced from the Go 1.15 compiler.
The object file format changed at 1.15, and 1.14 and earlier is not supported.

Fixes #13.
											
										
										
											4 years ago
-												add control flow obfuscation

Implemented control flow flattening with additional features such as block splitting and junk jumps
											
										
										
											11 months ago
+									newImportCfg, err := tf.processImportCfg(flags, nil)
-												Garble imports and package paths in GOPRIVATE (#116)

Finally, finally this is done. This allows import paths to be obfuscated by modifying
object/archive files and garbling import paths contained within. The bulk of the
code that makes parsing and writing Go object/archive files possible lives at
https://github.com/Binject/debug/tree/master/goobj2, which I wrote as well.

I have tested by garbling and checking for import paths via strings and grep
(in order of difficulty) https://github.com/lu4p/binclude, garble itself, and
https://github.com/dominikh/go-tools/tree/master/cmd/staticcheck.

This only supports object/archive files produced from the Go 1.15 compiler.
The object file format changed at 1.15, and 1.14 and earlier is not supported.

Fixes #13.
											
										
										
											4 years ago
+									if err != nil {
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+										return nil, err
-												Garble imports and package paths in GOPRIVATE (#116)

Finally, finally this is done. This allows import paths to be obfuscated by modifying
object/archive files and garbling import paths contained within. The bulk of the
code that makes parsing and writing Go object/archive files possible lives at
https://github.com/Binject/debug/tree/master/goobj2, which I wrote as well.

I have tested by garbling and checking for import paths via strings and grep
(in order of difficulty) https://github.com/lu4p/binclude, garble itself, and
https://github.com/dominikh/go-tools/tree/master/cmd/staticcheck.

This only supports object/archive files produced from the Go 1.15 compiler.
The object file format changed at 1.15, and 1.14 and earlier is not supported.

Fixes #13.
											
										
										
											4 years ago
+									}
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
+									// TODO: unify this logic with the -X handling when using -literals.
 									// We should be able to handle both cases via the syntax tree.
 									//
-												use "obfuscate" instead of "garble" in some more places

Mainly comments. "garble" refers to the tool, but the verb and adjective
is more intuitive as "obfuscate" and "obfuscated" instead of "garble"
and "garbled".

											
										
										
											3 years ago
+									// Make sure -X works with obfuscated identifiers.
 									// To cover both obfuscated and non-obfuscated names,
 									// duplicate each flag with a obfuscated version.
-												support -ldflags=-X=pkg.name=str with garbled names

Because the linker has access to all the build IDs, just like the
compiler, we can support this transparently. Add a test too.

Fixes #21.

											
										
										
											4 years ago
+									flagValueIter(flags, "-X", func(val string) {
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+										// val is in the form of "foo.com/bar.name=value".
 										fullName, stringValue, found := strings.Cut(val, "=")
 										if !found {
 											return // invalid
-												support -ldflags=-X=pkg.name=str with garbled names

Because the linker has access to all the build IDs, just like the
compiler, we can support this transparently. Add a test too.

Fixes #21.

											
										
										
											4 years ago
+										}
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
 										// fullName is "foo.com/bar.name"
 										i := strings.LastIndexByte(fullName, '.')
 										path, name := fullName[:i], fullName[i+1:]
-												support -ldflags=-X=pkg.name=str with garbled names

Because the linker has access to all the build IDs, just like the
compiler, we can support this transparently. Add a test too.

Fixes #21.

											
										
										
											4 years ago
-												refactor "current package" with TOOLEXEC_IMPORTPATH (#266)

Now that we've dropped support for Go 1.15.x, we can finally rely on
this environment variable for toolexec calls, present in Go 1.16.

Before, we had hacky ways of trying to figure out the current package's
import path, mostly from the -p flag. The biggest rough edge there was
that, for main packages, that was simply the package name, and not its
full import path.

To work around that, we had a restriction on a single main package, so
we could work around that issue. That restriction is now gone.

The new code is simpler, especially because we can set curPkg in a
single place for all toolexec transform funcs.

Since we can always rely on curPkg not being nil now, we can also start
reusing listedPackage.Private and avoid the majority of repeated calls
to isPrivate. The function is cheap, but still not free.

isPrivate itself can also get simpler. We no longer have to worry about
the "main" edge case. Plus, the sanity check for invalid package paths
is now unnecessary; we only got malformed paths from goobj2, and we now
require exact matches with the ImportPath field from "go list -json".

Another effect of clearing up the "main" edge case is that -debugdir now
uses the right directory for main packages. We also start using
consistent debugdir paths in the tests, for the sake of being easier to
read and maintain.

Finally, note that commandReverse did not need the extra call to "go
list -toolexec", as the "shared" call stored in the cache is enough. We
still call toolexecCmd to get said cache, which should probably be
simplified in a future PR.

While at it, replace the use of the "-std" compiler flag with the
Standard field from "go list -json".
											
										
										
											3 years ago
+										// If the package path is "main", it's the current top-level
 										// package we are linking.
 										// Otherwise, find it in the cache.
-												move curPkg and origImporter out of the globals

It is true that each garble process only obfuscates up to one package,
which is why we made them globals to begin with.
However, garble does quite a lot more now,
such as reversing the obfuscation of many packages at once.
Having a global "current package" variable makes mistakes easier.

Some funcs, like those in transformFuncs, are now transformer methods.

											
										
										
											1 year ago
+										lpkg := tf.curPkg
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+										if path != "main" {
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+											lpkg = sharedCache.ListedPackages[path]
-												support -ldflags=-X=pkg.name=str with garbled names

Because the linker has access to all the build IDs, just like the
compiler, we can support this transparently. Add a test too.

Fixes #21.

											
										
										
											4 years ago
+										}
-												ignore -ldflags=-X flags mentioning unknown packages

That would panic, since the *listedPackage would be nil for a package
path we aren't aware of:

	panic: runtime error: invalid memory address or nil pointer dereference
	[signal SIGSEGV: segmentation violation code=0x1 addr=0x88 pc=0x126b57d]

	goroutine 1 [running]:
	main.transformLink.func1(0x7ffeefbff28b, 0x5d)
		mvdan.cc/garble@v0.0.0-20210302140807-b03cd08c0946/main.go:1260 +0x17d
	main.flagValueIter(0xc0000a8e20, 0x2f, 0x2f, 0x12e278e, 0x2, 0xc000129e28)
		mvdan.cc/garble@v0.0.0-20210302140807-b03cd08c0946/main.go:1410 +0x1e9
	main.transformLink(0xc0000a8e20, 0x30, 0x36, 0x4, 0xc000114648, 0x23, 0x12dfd60, 0x0)
		mvdan.cc/garble@v0.0.0-20210302140807-b03cd08c0946/main.go:1241 +0x1b9
	main.mainErr(0xc0000a8e10, 0x31, 0x37, 0x37, 0x0)
		mvdan.cc/garble@v0.0.0-20210302140807-b03cd08c0946/main.go:287 +0x389
	main.main1(0xc000096058)
		mvdan.cc/garble@v0.0.0-20210302140807-b03cd08c0946/main.go:150 +0xe7
	main.main()
		mvdan.cc/garble@v0.0.0-20210302140807-b03cd08c0946/main.go:83 +0x25

The linker ignores such unknown references, so we should too.

Fixes #259.

											
										
										
											3 years ago
+										if lpkg == nil {
 											// We couldn't find the package.
 											// Perhaps a typo, perhaps not part of the build.
 											// cmd/link ignores those, so we should too.
 											return
 										}
-												refactor "current package" with TOOLEXEC_IMPORTPATH (#266)

Now that we've dropped support for Go 1.15.x, we can finally rely on
this environment variable for toolexec calls, present in Go 1.16.

Before, we had hacky ways of trying to figure out the current package's
import path, mostly from the -p flag. The biggest rough edge there was
that, for main packages, that was simply the package name, and not its
full import path.

To work around that, we had a restriction on a single main package, so
we could work around that issue. That restriction is now gone.

The new code is simpler, especially because we can set curPkg in a
single place for all toolexec transform funcs.

Since we can always rely on curPkg not being nil now, we can also start
reusing listedPackage.Private and avoid the majority of repeated calls
to isPrivate. The function is cheap, but still not free.

isPrivate itself can also get simpler. We no longer have to worry about
the "main" edge case. Plus, the sanity check for invalid package paths
is now unnecessary; we only got malformed paths from goobj2, and we now
require exact matches with the ImportPath field from "go list -json".

Another effect of clearing up the "main" edge case is that -debugdir now
uses the right directory for main packages. We also start using
consistent debugdir paths in the tests, for the sake of being easier to
read and maintain.

Finally, note that commandReverse did not need the extra call to "go
list -toolexec", as the "shared" call stored in the cache is enough. We
still call toolexecCmd to get said cache, which should probably be
simplified in a future PR.

While at it, replace the use of the "-std" compiler flag with the
Standard field from "go list -json".
											
										
										
											3 years ago
+										// As before, the main package must remain as "main".
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+										newPath := path
 										if path != "main" {
 											newPath = lpkg.obfuscatedImportPath()
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
+										}
-												make obfuscation fully deterministic with -seed

The default behavior of garble is to seed via the build inputs,
including the build IDs of the entire Go build of each package.
This works well as a default, and does give us determinism,
but it means that building for different platforms
will result in different obfuscation per platform.

Instead, when -seed is provided, don't use any other hash seed or salt.
This means that a particular Go name will be obfuscated the same way
as long as the seed, package path, and name itself remain constant.

In other words, when the user supplies a custom -seed,
we assume they know what they're doing in terms of storage and rotation.

Expand the README docs with more examples and detail.

Fixes #449.

											
										
										
											2 years ago
+										newName := hashWithPackage(lpkg, name)
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+										flags = append(flags, fmt.Sprintf("-X=%s.%s=%s", newPath, newName, stringValue))
-												support -ldflags=-X=pkg.name=str with garbled names

Because the linker has access to all the build IDs, just like the
compiler, we can support this transparently. Add a test too.

Fixes #21.

											
										
										
											4 years ago
+									})
-												update support for Go 1.17 in time for beta1

Back in early April we added initial support for Go 1.17,
working on a commit from master at that time. For that to work, we just
needed to add a couple of packages to runtimeRelated and tweak printFile
a bit to not break the new "//go:build" directives.

A significant amount of changes have landed since, though, and the tests
broke in multiple ways.

Most notably, the new register ABI is enabled by default for GOOS=amd64.
That affected garble indirectly in two ways: there's a new internal
package to add to runtimeRelated, and we must make reverse.txt more
clever in making its output constant across ABIs.

Another noticeable change is that Go 1.17 changes how its own version is
injected into the runtime package. It used to be via a constant in
runtime/internal/sys, such as:

	const TheVersion = `devel ...`

Since we couldn't override such constants via the linker's -X flag,
we had to directly alter the declaration while compiling.

Thankfully, Go 1.17 simply uses a "var buildVersion string" in the
runtime package, and its value is injected by the linker.
This means we can now override it with the linker's -X flag.

We make the code to alter TheVersion for Go 1.16 a bit more clever,
to not break the package when building with Go 1.17.

Finally, our hack to work around ambiguous TOOLEXEC_IMPORTPATH values
now only kicks in for non-test packages, since Go 1.17 includes our
upstream fix. Otherwise, some tests would end up with the ".test"
variant suffix added a second time:

	test/bar [test/bar.test] [test/bar [test/bar.test].test]

All the code to keep compatibility with Go 1.16.x remains in place.
We're still leaving TODOs to remind ourselves to remove it or simplify
it once we remove support for 1.16.x.

The 1.17 development freeze has already been in place for a month,
and beta1 is due to come this week, so it's unlikely that Go will change
in any considerable way at this point. Hence, we can say that support
for 1.17 is done.

Fixes #347.

											
										
										
											3 years ago
+									// Starting in Go 1.17, Go's version is implicitly injected by the linker.
 									// It's the same method as -X, so we can override it with an extra flag.
 									flags = append(flags, "-X=runtime.buildVersion=unknown")
-												strip buildid information from linked binaries

Otherwise, one can use 'go tool buildid' to obtain the main package's
build ID, which can make de-obfuscating the main package much simpler.

Fixes #43.

											
										
										
											4 years ago
+									// Ensure we strip the -buildid flag, to not leak any build IDs for the
 									// link operation or the main package's compilation.
 									flags = flagSetValue(flags, "-buildid", "")
-												speed up builds with the compiler's -dwarf=false flag

Generating DWARF in the compiler object files could take as much as 10%
extra CPU time, while we ignore it entirely at the link stage.

Speeds up 'go test -short' from ~19.5s to ~18.5s on my laptop.

											
										
										
											4 years ago
+									// Strip debug information and symbol tables.
-												start enforcing the link flags -w -s

											
										
										
											5 years ago
+									flags = append(flags, "-w", "-s")
-												reimplement import path obfuscation without goobj2 (#242)

We used to rely on a parallel implementation of an object file parser
and writer to be able to obfuscate import paths. After compiling each
package, we would parse the object file, replace the import paths, and
write the updated object file in-place.

That worked well, in most cases. Unfortunately, it had some flaws:

* Complexity. Even when most of the code is maintained in a separate
  module, the import_obfuscation.go file was still close to a thousand
  lines of code.

* Go compatibility. The object file format changes between Go releases,
  so we were supporting Go 1.15, but not 1.16. Fixing the object file
  package to work with 1.16 would probably break 1.15 support.

* Bugs. For example, we recently had to add a workaround for #224, since
  import paths containing dots after the domain would end up escaped.
  Another example is #190, which seems to be caused by the object file
  parser or writer corrupting the compiled code and causing segfaults in
  some rare edge cases.

Instead, let's drop that method entirely, and force the compiler and
linker to do the work for us. The steps necessary when compiling a
package to obfuscate are:

1) Replace its "package foo" lines with the obfuscated package path. No
   need to separate the package path and name, since the obfuscated path
   does not contain slashes.

2) Replace the "-p pkg/foo" flag with the obfuscated path.

3) Replace the "import" spec lines with the obfuscated package paths,
   for those dependencies which were obfuscated.

4) Replace the "-importcfg [...]" file with a version that uses the
   obfuscated paths instead.

The linker also needs that last step, since it also uses an importcfg
file to find object files.

There are three noteworthy drawbacks to this new method:

1) Since we no longer write object files, we can't use them to store
   data to be cached. As such, the -debugdir flag goes back to using the
   "-a" build flag to always rebuild all packages. On the plus side,
   that caching didn't work very well; see #176.

2) The package name "main" remains in all declarations under it, not
   just "func main", since we can only rename entire packages. This
   seems fine, as it gives little information to the end user.

3) The -tiny mode no longer sets all lines to 0, since it did that by
   modifying object files. As a temporary measure, we instead set all
   top-level declarations to be on line 1. A TODO is added to hopefully
   improve this again in the near future.

The upside is that we get rid of all the issues mentioned before. Plus,
garble now nearly works with Go 1.16, with the exception of two very
minor bugs that look fixable. A follow-up PR will take care of that and
start testing on 1.16.

Fixes #176.
Fixes #190.
											
										
										
											3 years ago
 									flags = flagSetValue(flags, "-importcfg", newImportCfg)
 									return append(flags, args...), nil
-												start enforcing the link flags -w -s

											
										
										
											5 years ago
+								}
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+								func splitFlagsFromArgs(all []string) (flags, args []string) {
 									for i := 0; i < len(all); i++ {
 										arg := all[i]
 										if !strings.HasPrefix(arg, "-") {
-												always use the compiler's -dwarf=false flag (#96)

First, our original append line was completely ineffective; we never
used that "flags" slice again. Second, we only attempted to use the flag
when we obfuscated a package.

In fact, we never care about debugging information here, so for any
package we compile, we can add "-dwarf=false". At the moment, we compile
all packages, even if they aren't to be obfuscated, due to the lack of
access to the build cache.

As such, we save a significant amount of work. The numbers below were
obtained on a quiet machine with "go test -bench=. -benchtime=10x", six
times before and after the change.

	name     old time/op       new time/op       delta
	Build-8        2.06s ± 4%        1.87s ± 2%  -9.21%  (p=0.002 n=6+6)

	name     old sys-time/op   new sys-time/op   delta
	Build-8        1.51s ± 2%        1.46s ± 1%  -3.12%  (p=0.004 n=6+5)

	name     old user-time/op  new user-time/op  delta
	Build-8        11.9s ± 2%        10.8s ± 1%  -8.71%  (p=0.002 n=6+6)

While at it, only do CI builds on pushes and PRs to the master branch,
so that my PRs created from the same repo don't trigger duplicate
builds.
											
										
										
											4 years ago
+											return all[:i:i], all[i:]
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+										}
 										if booleanFlags[arg] || strings.Contains(arg, "=") {
 											// Either "-bool" or "-name=value".
 											continue
 										}
 										// "-name value", so the next arg is part of this flag.
 										i++
 									}
 									return all, nil
 								}
-												remove unused code spotted by -coverprofile

Remove some asthelper APIs that haven't been used for some time.
They can be recovered from the git history if needed again.

One type assertion in the literals package is always true.

Embedded field objects are handled near the top of transformGo, so the
extra !obj.Embedded() check was always true. Remove it.

We always obfuscate standalone funcs now, so the obfuscatedTypesPackage
check is no longer necessary. This was necessary when we used to not
obfuscate func names when they were used in linkname directives.

The workaround for test package imports in obfuscatedTypesPackage I had
to add a few commits ago no longer seems to be necessary. This might be
thanks to the simplification with functions in the paragraph just above.

It's impossible to run garble without -trimpath nowadays, as we error
before the build even starts:

	$ go build -toolexec=garble
	go tool compile: exit status 1
	cannot open shared file, this is most likely due to not running "garble [command]"

When run as "garble build", the trimpath flag is always set. So the
check in alterTrimpath never triggers anymore, and couldn't be tested.

Finally, simplify the handling of comment syntax in printFile, and add a
few TODOs for other code paths not covered by our existing tests.

Total code coverage is up from 90.3% to 91.0%.

											
										
										
											3 years ago
+								func alterTrimpath(flags []string) []string {
-												avoid reproducibility issues with full rebuilds

We were using temporary filenames for modified Go and assembly files.
For example, an obfuscated "encoding/json/encode.go" would end up as:

	/tmp/garble-shared123/encode.go.456.go

where "123" and "456" are random numbers, usually longer.

This was usually fine for two reasons:

1) We would add "/tmp/garble-shared123/" to -trimpath, so the temporary
   directory and its random number would be invisible.

2) We would add "//line" directives to the source files, replacing
   the filename with obfuscated versions excluding any random number.

Unfortunately, this broke in multiple ways. Most notably, assembly files
do not have any line directives, and it's not clear that there's any
support for them. So the random number in their basename could end up in
the binary, breaking reproducibility.

Another issue is that the -trimpath addition described above was only
done for cmd/compile, not cmd/asm, so assembly filenames included the
randomized temporary directory.

To fix the issues above, the same "encoding/json/encode.go" would now
end up as:

	/tmp/garble-shared123/encoding/json/encode.go

Such a path is still unique even though the "456" random number is gone,
as import paths are unique within a single build.

This fixes issues with the base name of each file, so we no longer rely
on line directives as the only way to remove the second original random
number.

We still rely on -trimpath to get rid of the temporary directory in
filenames. To fix its problem with assembly files, also amend the
-trimpath flag when running the assembler tool.

Finally, add a test that reproducible builds still work when a full
rebuild is done. We choose goprivate.txt for such a test as its
stdimporter package imports a number of std packages, including uses of
assembly and cgo.

For the time being, we don't use such a "full rebuild" reproducibility
test in other test scripts, as this step is expensive, rebuilding many
packages from scratch.

This issue went unnoticed for over a year because such random numbers
"123" and "456" were created when a package was obfuscated, and that
only happened once per package version as long as the build cache was
kept intact.

When clearing the build cache, or forcing a rebuild with -a, one gets
new random numbers, and thus a different binary resulting from the same
build input. That's not something that most users would do regularly,
and our tests did not cover that edge case either, until now.

Fixes #328.

											
										
										
											3 years ago
+									trimpath := flagValue(flags, "-trimpath")
 									// Add our temporary dir to the beginning of -trimpath, so that we don't
 									// leak temporary dirs. Needs to be at the beginning, since there may be
 									// shorter prefixes later in the list, such as $PWD if TMPDIR=$PWD/tmp.
-												remove unused code spotted by -coverprofile

Remove some asthelper APIs that haven't been used for some time.
They can be recovered from the git history if needed again.

One type assertion in the literals package is always true.

Embedded field objects are handled near the top of transformGo, so the
extra !obj.Embedded() check was always true. Remove it.

We always obfuscate standalone funcs now, so the obfuscatedTypesPackage
check is no longer necessary. This was necessary when we used to not
obfuscate func names when they were used in linkname directives.

The workaround for test package imports in obfuscatedTypesPackage I had
to add a few commits ago no longer seems to be necessary. This might be
thanks to the simplification with functions in the paragraph just above.

It's impossible to run garble without -trimpath nowadays, as we error
before the build even starts:

	$ go build -toolexec=garble
	go tool compile: exit status 1
	cannot open shared file, this is most likely due to not running "garble [command]"

When run as "garble build", the trimpath flag is always set. So the
check in alterTrimpath never triggers anymore, and couldn't be tested.

Finally, simplify the handling of comment syntax in printFile, and add a
few TODOs for other code paths not covered by our existing tests.

Total code coverage is up from 90.3% to 91.0%.

											
										
										
											3 years ago
+									return flagSetValue(flags, "-trimpath", sharedTempDir+"=>;"+trimpath)
-												avoid reproducibility issues with full rebuilds

We were using temporary filenames for modified Go and assembly files.
For example, an obfuscated "encoding/json/encode.go" would end up as:

	/tmp/garble-shared123/encode.go.456.go

where "123" and "456" are random numbers, usually longer.

This was usually fine for two reasons:

1) We would add "/tmp/garble-shared123/" to -trimpath, so the temporary
   directory and its random number would be invisible.

2) We would add "//line" directives to the source files, replacing
   the filename with obfuscated versions excluding any random number.

Unfortunately, this broke in multiple ways. Most notably, assembly files
do not have any line directives, and it's not clear that there's any
support for them. So the random number in their basename could end up in
the binary, breaking reproducibility.

Another issue is that the -trimpath addition described above was only
done for cmd/compile, not cmd/asm, so assembly filenames included the
randomized temporary directory.

To fix the issues above, the same "encoding/json/encode.go" would now
end up as:

	/tmp/garble-shared123/encoding/json/encode.go

Such a path is still unique even though the "456" random number is gone,
as import paths are unique within a single build.

This fixes issues with the base name of each file, so we no longer rely
on line directives as the only way to remove the second original random
number.

We still rely on -trimpath to get rid of the temporary directory in
filenames. To fix its problem with assembly files, also amend the
-trimpath flag when running the assembler tool.

Finally, add a test that reproducible builds still work when a full
rebuild is done. We choose goprivate.txt for such a test as its
stdimporter package imports a number of std packages, including uses of
assembly and cgo.

For the time being, we don't use such a "full rebuild" reproducibility
test in other test scripts, as this step is expensive, rebuilding many
packages from scratch.

This issue went unnoticed for over a year because such random numbers
"123" and "456" were created when a package was obfuscated, and that
only happened once per package version as long as the build cache was
kept intact.

When clearing the build cache, or forcing a rebuild with -a, one gets
new random numbers, and thus a different binary resulting from the same
build input. That's not something that most users would do regularly,
and our tests did not cover that edge case either, until now.

Fixes #328.

											
										
										
											3 years ago
+								}
-												drop support for Go 1.20

Go 1.21.0 was released in August 2023, so our upcoming release
will no longer support the Go 1.20 release series.

The first Go 1.22 release candidate is also due in December 2023,
less than a month from now, so dropping 1.20 will simplify 1.22 work.

											
										
										
											7 months ago
+								// forwardBuildFlags is obtained from 'go help build' as of Go 1.21.
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
+								var forwardBuildFlags = map[string]bool{
 									// These shouldn't be used in nested cmd/go calls.
 									"-a": false,
 									"-n": false,
 									"-x": false,
 									"-v": false,
 									// These are always set by garble.
-												remove the use of -buildinfo=false

It looks like the flag will be scrapped from Go 1.18.
Stop using it before 1.18rc1 releases without it.

See: https://github.com/golang/go/issues/50501#issuecomment-1010225207

											
										
										
											2 years ago
+									"-trimpath": false,
 									"-toolexec": false,
 									"-buildvcs": false,
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-C":             true,
-												update cmd/go flags for 1.18

As of 1.18beta1. I used bash commands like:

	diff -u <(go1.17.5 help build) <(gotip help build)

While here, supply -buildinfo=false and -buildvcs=false to go build.
We already remove that information by discarding the _gomod_.go file,
but we might as well pass the flags too.
If anything, it lets the toolchain avoid the work entirely.
Note that we can't use these flags on Go 1.17 for now, though.

Add a TODO that came to mind while writing this, too.

											
										
										
											2 years ago
+									"-asan":          true,
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+									"-asmflags":      true,
 									"-buildmode":     true,
 									"-compiler":      true,
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-cover":         true,
-												minor tweaks in preparation for Go 1.21

Update CI to use a newer version of Go master,
now that we're already getting release candidates.

Look at the diffs between Go 1.20 and master of `go help build`
and `go help testflag`, and add two flags that were recently added.

While here, bump a hopeful TODO for a feature request,
since that one definitely did not happen for 1.21.

											
										
										
											11 months ago
+									"-covermode":     true,
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-coverpkg":      true,
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+									"-gccgoflags":    true,
 									"-gcflags":       true,
 									"-installsuffix": true,
 									"-ldflags":       true,
 									"-linkshared":    true,
 									"-mod":           true,
 									"-modcacherw":    true,
 									"-modfile":       true,
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-msan":          true,
 									"-overlay":       true,
 									"-p":             true,
 									"-pgo":           true,
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+									"-pkgdir":        true,
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-race":          true,
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+									"-tags":          true,
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-work":          true,
-												update cmd/go flags for 1.18

As of 1.18beta1. I used bash commands like:

	diff -u <(go1.17.5 help build) <(gotip help build)

While here, supply -buildinfo=false and -buildvcs=false to go build.
We already remove that information by discarding the _gomod_.go file,
but we might as well pass the flags too.
If anything, it lets the toolchain avoid the work entirely.
Note that we can't use these flags on Go 1.17 for now, though.

Add a TODO that came to mind while writing this, too.

											
										
										
											2 years ago
+									"-workfile":      true,
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+								}
-												drop support for Go 1.20

Go 1.21.0 was released in August 2023, so our upcoming release
will no longer support the Go 1.20 release series.

The first Go 1.22 release candidate is also due in December 2023,
less than a month from now, so dropping 1.20 will simplify 1.22 work.

											
										
										
											7 months ago
+								// booleanFlags is obtained from 'go help build' and 'go help testflag' as of Go 1.21.
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+								var booleanFlags = map[string]bool{
 									// Shared build flags.
 									"-a":          true,
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-asan":       true,
 									"-buildvcs":   true,
 									"-cover":      true,
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+									"-i":          true,
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-linkshared": true,
 									"-modcacherw": true,
 									"-msan":       true,
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+									"-n":          true,
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-race":       true,
 									"-trimpath":   true,
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+									"-v":          true,
-												obfuscate net and runtime/debug

It appears that we already support obfuscating them,
and nothing seems to break when they are pulled in.

While here, add runtime/internal/syscall to runtimeAndDeps.
It first appeared in Go 1.18, but we missed adding it.
It seems like not having it there didn't cause any issues,
which makes sense given it's got almost zero Go code.

We also teach garble about the -work boolean build flag,
which has existed for multiple years but we forgot about.
It's likely that noone noticed as it's a rarely used flag.

											
										
										
											2 years ago
+									"-work":       true,
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+									"-x":          true,
 									// Test flags (TODO: support its special -args flag)
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-benchmem": true,
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+									"-c":        true,
 									"-failfast": true,
-												minor tweaks in preparation for Go 1.21

Update CI to use a newer version of Go master,
now that we're already getting release candidates.

Look at the diffs between Go 1.20 and master of `go help build`
and `go help testflag`, and add two flags that were recently added.

While here, bump a hopeful TODO for a feature request,
since that one definitely did not happen for 1.21.

											
										
										
											11 months ago
+									"-fullpath": true,
-												update flag tables with Go 1.20

`go help build` now has -C, -cover, -coverpkg, and -pgo.

There are no new boolean flags, but note that -cover is now a common
build flag rather than just a test flag.

While here, sort the lists so that they are easier to skim for missing
items in the future.

											
										
										
											1 year ago
+									"-json":     true,
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+									"-short":    true,
 								}
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
+								func filterForwardBuildFlags(flags []string) (filtered []string, firstUnknown string) {
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+									for i := 0; i < len(flags); i++ {
 										arg := flags[i]
-												avoid obfuscating literals set via -ldflags=-X

The -X linker flag sets a string variable to a given value,
which is often used to inject strings such as versions.

The way garble's literal obfuscation works,
we replace string literals with anonymous functions which,
when evaluated, result in the original string.

Both of these features work fine separately,
but when intersecting, they break. For example, given:

	var myVar = "original"
	[...]
	-ldflags=-X=main.myVar=replaced

The -X flag effectively replaces the initial value,
and -literals adds code to be run at init time:

	var myVar = "replaced"
	func init() { myVar = func() string { ... } }

Since the init func runs later, -literals breaks -X.
To avoid that problem,
don't obfuscate literals whose variables are set via -ldflags=-X.

We also leave TODOs about obfuscating those in the future,
but we're also leaving regression tests to ensure we get it right.

Fixes #323.

											
										
										
											2 years ago
+										if strings.HasPrefix(arg, "--") {
 											arg = arg[1:] // "--name" to "-name"; keep the short form
 										}
-												slightly improve code thanks to Go 1.18 APIs

strings.Cut makes some string handling code more intuitive.
Note that we can't use it everywhere, as some places need LastIndexByte.

Start using x/exp/slices, too, which is our first use of generics.
Note that its API is experimental and may still change,
but since we are not a library, we can control its version updates.

I also noticed that we were using TrimSpace for importcfg files.
It's actually unnecessary if we swap strings.SplitAfter for Split,
as the only whitespace present was the trailing newline.

While here, I noticed an unused copy of printfWithoutPackage.

											
										
										
											2 years ago
+										name, _, _ := strings.Cut(arg, "=") // "-name=value" to "-name"
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
-												fail if we are unexpectedly overwriting files (#418)

While investigating a bug report,
I noticed that garble was writing to the same temp file twice.
At best, writing to the same path on disk twice is wasteful,
as the design is careful to be deterministic and use unique paths.
At worst, the two writes could cause races at the filesystem level.

To prevent either of those situations,
we now create files with os.OpenFile and os.O_EXCL,
meaning that we will error if the file already exists.
That change uncovered a number of such unintended cases.

First, transformAsm would write obfuscated Go files twice.
This is because the Go toolchain actually runs:

	[...]/asm -gensymabis [...] foo.s bar.s
	[...]/asm [...] foo.s bar.s

That is, the first run is only meant to generate symbol ABIs,
which are then used by the compiler.
We need to obfuscate at that first stage,
because the symbol ABI descriptions need to use obfuscated names.

However, having already obfuscated the assembly on the first stage,
there is no need to do so again on the second stage.
If we detect gensymabis is missing, we simply reuse the previous files.

This first situation doesn't seem racy,
but obfuscating the Go assembly files twice is certainly unnecessary.

Second, saveKnownReflectAPIs wrote a gob file to the build cache.
Since the build cache can be kept between builds,
and since the build cache uses reproducible paths for each build,
running the same "garble build" twice could overwrite those files.

This could actually cause races at the filesystem level;
if two concurrent builds write to the same gob file on disk,
one of them could end up using a partially-written file.

Note that this is the only of the three cases not using temporary files.
As such, it is expected that the file may already exist.
In such a case, we simply avoid overwriting it rather than failing.

Third, when "garble build -a" was used,
and when we needed an export file not listed in importcfg,
we would end up calling roughly:

	go list -export -toolexec=garble -a <dependency>

This meant we would re-build and re-obfuscate those packages.
Which is unfortunate, because the parent process already did via:

	go build -toolexec=garble -a <main>

The repeated dependency builds tripped the new os.O_EXCL check,
as we would try to overwrite the same obfuscated Go files.
Beyond being wasteful, this could again cause subtle filesystem races.
To fix the problem, avoid passing flags like "-a" to nested go commands.

Overall, we should likely be using safer ways to write to disk,
be it via either atomic writes or locked files.
However, for now, catching duplicate writes is a big step.
I have left a self-assigned TODO for further improvements.

CI on the pull request found a failure on test-gotip.
The failure reproduces on master, so it seems to be related to gotip,
and not a regression introduced by this change.
For now, disable test-gotip until we can investigate.
											
										
										
											3 years ago
+										buildFlag := forwardBuildFlags[name]
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+										if buildFlag {
 											filtered = append(filtered, arg)
-												handle unknown flags in reverse (#290)

While at it, expand the tests for build and test too.
											
										
										
											3 years ago
+										} else {
 											firstUnknown = name
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+										}
 										if booleanFlags[arg] || strings.Contains(arg, "=") {
 											// Either "-bool" or "-name=value".
 											continue
 										}
 										// "-name value", so the next arg is part of this flag.
-												properly skip non-build flags for 'go list'

If the flags list included ["-o" "binary"], we would properly skip "-o",
but we wouldn't skip "binary".

Thus, 'go list' would receive "binary" as the first argument, and assume
that's the first parameter and the end of the flags.

And add a unit test case.

Fixes #82, again.

											
										
										
											4 years ago
+										if i++; buildFlag && i < len(flags) {
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+											filtered = append(filtered, flags[i])
 										}
 									}
-												handle unknown flags in reverse (#290)

While at it, expand the tests for build and test too.
											
										
										
											3 years ago
+									return filtered, firstUnknown
-												keep build flags when calling 'go list'

Otherwise any build flags like -tags won't be used, and we might easily
end up with errors or incorrect packages.

The common case with -tags is covered by one of the integration test
scripts. On top of that, we add a table-driven unit test to cover all
edge cases, since there are many we can do quickly in a unit test.

Fixes #82.

											
										
										
											4 years ago
+								}
-												add a bit more docs

											
										
										
											5 years ago
+								// splitFlagsFromFiles splits args into a list of flag and file arguments. Since
 								// we can't rely on "--" being present, and we don't parse all flags upfront, we
 								// rely on finding the first argument that doesn't begin with "-" and that has
-												garbling imported packages starts being supported

											
										
										
											5 years ago
+								// the extension we expect for the list of paths.
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+								//
 								// This function only makes sense for lower-level tool commands, such as
 								// "compile" or "link", since their arguments are predictable.
-												support import paths ending with ".go"

When splitFlagsFromFiles saw "-p foo/bar.go",
it thought that was the first Go file, when in fact it's not.
We didn't notice because such import paths are pretty rare,
but they do exist, like github.com/nats-io/nats.go.

Before the fix, the added test case fails as expected:

	> garble build -tags buildtag
	[stderr]
	# test/main/goextension.go
	open test/main/goextension.go: no such file or directory

We could go through the trouble of teaching splitFlagsFromFiles about
all of the flags understood by the compiler and linker, but that feels
like far more code than the small alternative we went with.
And I'm pretty sure the alternative will work pretty reliably for now.

Fixes #539.

											
										
										
											2 years ago
+								//
 								// We iterate from the end rather than from the start, to better protect
 								// oursrelves from flag arguments that may look like paths, such as:
 								//
 								//	compile [flags...] -p pkg/path.go [more flags...] file1.go file2.go
 								//
 								// For now, since those confusing flags are always followed by more flags,
 								// iterating in reverse order works around them entirely.
-												reuse a single 'go list -json -export -deps' call

Instead of doing a 'go list' call every time we need to fetch a
dependency's export file, we now do a single 'go list' call before the
build begins. With the '-deps' flag, it gives us all the dependency
packages recursively.

We store that data in the gob format in a temporary file, and share it
with the future garble sub-processes via an env var.

This required lazy parsing of flags for the 'build' and 'test' commands,
since now we need to run 'go list' with the same package pattern
arguments.

Fixes #63.

											
										
										
											4 years ago
+								func splitFlagsFromFiles(all []string, ext string) (flags, paths []string) {
-												support import paths ending with ".go"

When splitFlagsFromFiles saw "-p foo/bar.go",
it thought that was the first Go file, when in fact it's not.
We didn't notice because such import paths are pretty rare,
but they do exist, like github.com/nats-io/nats.go.

Before the fix, the added test case fails as expected:

	> garble build -tags buildtag
	[stderr]
	# test/main/goextension.go
	open test/main/goextension.go: no such file or directory

We could go through the trouble of teaching splitFlagsFromFiles about
all of the flags understood by the compiler and linker, but that feels
like far more code than the small alternative we went with.
And I'm pretty sure the alternative will work pretty reliably for now.

Fixes #539.

											
										
										
											2 years ago
+									for i := len(all) - 1; i >= 0; i-- {
 										arg := all[i]
 										if strings.HasPrefix(arg, "-") || !strings.HasSuffix(arg, ext) {
 											cutoff := i + 1 // arg is a flag, not a path
 											return all[:cutoff:cutoff], all[cutoff:]
-												start enforcing the link flags -w -s

											
										
										
											5 years ago
+										}
 									}
-												support import paths ending with ".go"

When splitFlagsFromFiles saw "-p foo/bar.go",
it thought that was the first Go file, when in fact it's not.
We didn't notice because such import paths are pretty rare,
but they do exist, like github.com/nats-io/nats.go.

Before the fix, the added test case fails as expected:

	> garble build -tags buildtag
	[stderr]
	# test/main/goextension.go
	open test/main/goextension.go: no such file or directory

We could go through the trouble of teaching splitFlagsFromFiles about
all of the flags understood by the compiler and linker, but that feels
like far more code than the small alternative we went with.
And I'm pretty sure the alternative will work pretty reliably for now.

Fixes #539.

											
										
										
											2 years ago
+									return nil, all
-												initial commit

											
										
										
											5 years ago
+								}
-												error if the user forgot -trimpath

											
										
										
											5 years ago
-												add a bit more docs

											
										
										
											5 years ago
+								// flagValue retrieves the value of a flag such as "-foo", from strings in the
-												support -ldflags=-X=pkg.name=str with garbled names

Because the linker has access to all the build IDs, just like the
compiler, we can support this transparently. Add a test too.

Fixes #21.

											
										
										
											4 years ago
+								// list of arguments like "-foo=bar" or "-foo" "bar". If the flag is repeated,
 								// the last value is returned.
-												error if the user forgot -trimpath

											
										
										
											5 years ago
+								func flagValue(flags []string, name string) string {
-												support -ldflags=-X=pkg.name=str with garbled names

Because the linker has access to all the build IDs, just like the
compiler, we can support this transparently. Add a test too.

Fixes #21.

											
										
										
											4 years ago
+									lastVal := ""
 									flagValueIter(flags, name, func(val string) {
 										lastVal = val
 									})
 									return lastVal
 								}
 								// flagValueIter retrieves all the values for a flag such as "-foo", like
 								// flagValue. The difference is that it allows handling complex flags, such as
 								// those whose values compose a list.
 								func flagValueIter(flags []string, name string, fn func(string)) {
-												error if the user forgot -trimpath

											
										
										
											5 years ago
+									for i, arg := range flags {
 										if val := strings.TrimPrefix(arg, name+"="); val != arg {
 											// -name=value
-												support -ldflags=-X=pkg.name=str with garbled names

Because the linker has access to all the build IDs, just like the
compiler, we can support this transparently. Add a test too.

Fixes #21.

											
										
										
											4 years ago
+											fn(val)
-												error if the user forgot -trimpath

											
										
										
											5 years ago
+										}
-												parse boolean flags differently from string flags

This is important, because "-std -foo" and "-buildid -foo" are entirely
different cases. The first is equivalent to "-std=true -foo" since the
flag is boolean, but the second is equivalent to "-buildid=-foo" since
the flag isn't boolean.

We can keep track of which of the flags we're interested in are boolean,
which isn't much extra work. Also add unit tests; the build ID is a
hash, so it's very hard to write an end-to-end test that reliably has an
ID starting with a dash.

											
										
										
											4 years ago
+										if arg == name { // -name ...
-												start hashing identifiers

											
										
										
											5 years ago
+											if i+1 < len(flags) {
-												parse boolean flags differently from string flags

This is important, because "-std -foo" and "-buildid -foo" are entirely
different cases. The first is equivalent to "-std=true -foo" since the
flag is boolean, but the second is equivalent to "-buildid=-foo" since
the flag isn't boolean.

We can keep track of which of the flags we're interested in are boolean,
which isn't much extra work. Also add unit tests; the build ID is a
hash, so it's very hard to write an end-to-end test that reliably has an
ID starting with a dash.

											
										
										
											4 years ago
+												// -name value
-												support -ldflags=-X=pkg.name=str with garbled names

Because the linker has access to all the build IDs, just like the
compiler, we can support this transparently. Add a test too.

Fixes #21.

											
										
										
											4 years ago
+												fn(flags[i+1])
-												start hashing identifiers

											
										
										
											5 years ago
+											}
-												error if the user forgot -trimpath

											
										
										
											5 years ago
+										}
 									}
 								}
-												make output binaries deterministic

We were leaking temporary file paths, which is no longer the case.

											
										
										
											5 years ago
 								func flagSetValue(flags []string, name, value string) []string {
 									for i, arg := range flags {
 										if strings.HasPrefix(arg, name+"=") {
 											// -name=value
-												parse boolean flags differently from string flags

This is important, because "-std -foo" and "-buildid -foo" are entirely
different cases. The first is equivalent to "-std=true -foo" since the
flag is boolean, but the second is equivalent to "-buildid=-foo" since
the flag isn't boolean.

We can keep track of which of the flags we're interested in are boolean,
which isn't much extra work. Also add unit tests; the build ID is a
hash, so it's very hard to write an end-to-end test that reliably has an
ID starting with a dash.

											
										
										
											4 years ago
+											flags[i] = name + "=" + value
-												make output binaries deterministic

We were leaking temporary file paths, which is no longer the case.

											
										
										
											5 years ago
+											return flags
 										}
-												parse boolean flags differently from string flags

This is important, because "-std -foo" and "-buildid -foo" are entirely
different cases. The first is equivalent to "-std=true -foo" since the
flag is boolean, but the second is equivalent to "-buildid=-foo" since
the flag isn't boolean.

We can keep track of which of the flags we're interested in are boolean,
which isn't much extra work. Also add unit tests; the build ID is a
hash, so it's very hard to write an end-to-end test that reliably has an
ID starting with a dash.

											
										
										
											4 years ago
+										if arg == name { // -name ...
 											if i+1 < len(flags) {
 												// -name value
 												flags[i+1] = value
 												return flags
-												make output binaries deterministic

We were leaking temporary file paths, which is no longer the case.

											
										
										
											5 years ago
+											}
 											return flags
 										}
 									}
 									return append(flags, name+"="+value)
 								}
-												Share data between processes via a shared file. (#192)

Previously garble heavily used env vars to share data between processes.
This also makes it easy to share complex data between processes.

The complexity of main.go is considerably reduced.
											
										
										
											4 years ago
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
+								func fetchGoEnv() error {
 									out, err := exec.Command("go", "env", "-json",
-												default to GOGARBLE=*, stop using GOPRIVATE

We can drop the code that kicked in when GOGARBLE was empty.
We can also add the value in addGarbleToHash unconditionally,
as we never allow it to be empty.

In the tests, remove all GOGARBLE lines where it just meant "obfuscate
everything" or "obfuscate the entire main module".

cgo.txtar had "obfuscate everything" as a separate step,
so remove it entirely.

linkname.txtar started failing because the imported package did not
import strings, so listPackage errored out. This wasn't a problem when
strings itself wasn't obfuscated, as transformLinkname silently left
strings.IndexByte untouched. It is a problem when IndexByte does get
obfuscated. Make that kind of listPackage error visible, and fix it.

reflect.txtar started failing with "unreachable method" runtime throws.
It's not clear to me why; it appears that GOGARBLE=* makes the linker
think that ExportedMethodName is suddenly unreachable.
Work around the problem by making the method explicitly reachable,
and leave a TODO as a reminder to investigate.

Finally, gogarble.txtar no longer needs to test for GOPRIVATE.
The rest of the test is left the same, as we still want the various
values for GOGARBLE to continue to work just like before.

Fixes #594.

											
										
										
											2 years ago
+										// Keep in sync with sharedCache.GoEnv.
-												use the host's GOEXE when building the linker

We're building the linker binary for the host GOOS,
not the target GOOS that we happen to be building for.

I noticed that, after running `go test`, my garble cache
would contain both link and link.exe, which made no sense
as I run linux and not windows.

`go env` has GOHOSTOS to mirror GOOS, but there is no
GOHOSTEXE to mirror GOEXE, so we reconstruct it from
runtime.GOOS, which is equivalent to GOHOSTOS.

Add a regression test as well.

											
										
										
											1 year ago
+										"GOOS", "GOMOD", "GOVERSION", "GOROOT",
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
+									).CombinedOutput()
 									if err != nil {
-												add a few TODOs with uncovered code that should not be

This will hopefully give new contributors a place to start.
Some of these, like verifying that the "help" commands work,
will be relatively simple additions to our test scripts.

											
										
										
											2 years ago
+										// TODO: cover this in the tests.
-												implement TODO to use a name variable

Slightly simplifies the main chunk of code.
While here, improve the wording for when Go isn't installed correctly.

											
										
										
											2 years ago
+										fmt.Fprintf(os.Stderr, `Can't find the Go toolchain: %v
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
-												implement TODO to use a name variable

Slightly simplifies the main chunk of code.
While here, improve the wording for when Go isn't installed correctly.

											
										
										
											2 years ago
+								This is likely due to Go not being installed/setup correctly.
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
-												implement TODO to use a name variable

Slightly simplifies the main chunk of code.
While here, improve the wording for when Go isn't installed correctly.

											
										
										
											2 years ago
+								To install Go, see: https://go.dev/doc/install
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
+								`, err)
-												make "garble command -h" give command-specific help

Before, "garble build -h" would print the same as "garble -h", which is
too much and confusing, as it doesn't tell us much about "build".

Now it's far better, and includes the output of "go build -h":

	$ garble build -h
	usage: garble [garble flags] build [arguments]

	This command wraps "go build". Below is its help:

	usage: go build [-o output] [build flags] [packages]
	Run 'go help build' for details.

We do the same for "garble reverse -h", since it doesn't wrap a Go tool
command.

											
										
										
											3 years ago
+										return errJustExit(1)
-												use "go env -json" to collect env info all at once

In the worst case scenario, when GOPRIVATE isn't set at all, we would
run these three commands:

* "go env GOPRIVATE", to fetch GOPRIVATE itself
* "go list -m", for GOPRIVATE's fallback
* "go version", to check the version of Go being used

Now that we support Go 1.16 and later, all these three can be obtained
via "go env -json":

	$ go env -json GOPRIVATE GOMOD GOVERSION
	{
		"GOMOD": "/home/mvdan/src/garble/go.mod",
		"GOPRIVATE": "",
		"GOVERSION": "go1.16.3"
	}

Note that we don't get the module path directly, but we can use the
x/mod/modfile Go API to parse it from the GOMOD file cheaply.

Notably, this also simplifies our Go version checking logic, as now we
get just the version string without the "go version" prefix and
"GOOS/GOARCH" suffix we don't care about.

This makes our code a bit more maintainable and robust. When running a
short incremental build, we can also see a small speed-up, as saving two
"go" invocations can save a few milliseconds:

	name           old time/op       new time/op       delta
	Build/Cache-8        168ms ± 0%        166ms ± 1%  -1.26%  (p=0.009 n=6+6)

	name           old bin-B         new bin-B         delta
	Build/Cache-8        6.36M ± 0%        6.36M ± 0%  +0.12%  (p=0.002 n=6+6)

	name           old sys-time/op   new sys-time/op   delta
	Build/Cache-8        222ms ± 2%        219ms ± 3%    ~     (p=0.589 n=6+6)

	name           old user-time/op  new user-time/op  delta
	Build/Cache-8        857ms ± 1%        846ms ± 1%  -1.31%  (p=0.041 n=6+6)

											
										
										
											3 years ago
+									}
-												rename cache global to sharedCache

Since we will start importing github.com/rogpeppe/go-internal/cache,
and I don't want to have to rename it or leave confusion around.

											
										
										
											1 year ago
+									if err := json.Unmarshal(out, &sharedCache.GoEnv); err != nil {
-												add missing context to two unmarshal errors

Returning a json or gob error directly to the user is generally not
helpful, as it lacks any form of context.
For example, from the json unmarshal of "go env -json":

	$ garble build
	invalid character 'w' looking for beginning of value

Also improve the error when the user ran garble in the wrong way,
resulting in no shared gob file. The context is now shorter, and we also
include the os.Open error in case it contains any useful details.

While here, apply Go tip's gofmt, which reformatted a godoc list.

For #523.

											
										
										
											2 years ago
+										return fmt.Errorf(`cannot unmarshal from "go env -json": %w`, err)
-												Share data between processes via a shared file. (#192)

Previously garble heavily used env vars to share data between processes.
This also makes it easy to share complex data between processes.

The complexity of main.go is considerably reduced.
											
										
										
											4 years ago
+									}
-												start using some Go 1.22 features

We no longer need to worry about the scope of range variables,
we can iterate over integers directly, and we can use cmp.Or too.

I haven't paid close attention to using these everywhere.
This is mainly testing out the new features where I saw some benefit.

											
										
										
											4 months ago
+									sharedCache.GOGARBLE = cmp.Or(os.Getenv("GOGARBLE"), "*") // we default to obfuscating everything
-												Share data between processes via a shared file. (#192)

Previously garble heavily used env vars to share data between processes.
This also makes it easy to share complex data between processes.

The complexity of main.go is considerably reduced.
											
										
										
											4 years ago
+									return nil
 								}