You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
garble/main_test.go

499 lines
13 KiB
Go

// Copyright (c) 2019, The Garble Authors.
// See LICENSE for licensing information.
5 years ago
package main
import (
"flag"
"fmt"
"go/ast"
"go/printer"
"go/token"
"io/fs"
mathrand "math/rand"
5 years ago
"os"
"os/exec"
5 years ago
"path/filepath"
"regexp"
"runtime"
"strings"
5 years ago
"testing"
"time"
5 years ago
"github.com/google/go-cmp/cmp"
"github.com/rogpeppe/go-internal/goproxytest"
"github.com/rogpeppe/go-internal/gotooltest"
5 years ago
"github.com/rogpeppe/go-internal/testscript"
ah "mvdan.cc/garble/internal/asthelper"
5 years ago
)
var proxyURL string
5 years ago
func TestMain(m *testing.M) {
make `go test -race` fast again on Go 1.21 On my laptop, `go test -short -race` on Go 1.20 used to take about 62s. Jumping to Go 1.21, I was surprised to see an increase to 152s, more than double - which was weird given how often the CPU was idle. This manifested when updating our CI to start testing on Go 1.21. Where Go 1.20 on Linux took about 10m to run `go test -race`, Go 1.21 hit the 20m timeout every single time. After a bit of googling, I was reminded of https://go.dev/issues/20364 as well as https://go.dev/doc/articles/race_detector#Options: atexit_sleep_ms (default 1000): Amount of milliseconds to sleep in the main goroutine before exiting. This default is a bit aggressive for Go, but usually harmless, having each test binary sleep for 1s after the package has been tested. However, this 1s sleep after main runs is horrendous for garble's tests; the testscripts run `garble build` many times, running the test binary. It then runs `go build -toolexec=garble`, which runs the test binary many more times: for every compiler, linker, etc invocation. This means that our testscripts would include dozens of 1s sleeps, in many cases blocking the continuation of the entire test. This seemed to not be happening on earlier Go versions due to a bug; Go 1.21's race mode started obeying this default properly. The added change sets atexit_sleep_ms to something more reasonable if GORACE isn't set at all; 10ms doesn't disable this check entirely, but its overhead is orders of magnitude less noticeable than 1000ms. `go test -short -race` on Go 1.21 drops back down to 68s for me.
8 months ago
// If GORACE is unset, lower the default of atexit_sleep_ms=1000,
// since otherwise every execution of garble through the test binary
// would sleep for one second before exiting.
// Given how many times garble runs via toolexec, that is very slow!
// If GORACE is set, we assume that the caller knows what they are doing,
// and we don't try to replace or modify their flags.
if os.Getenv("GORACE") == "" {
os.Setenv("GORACE", "atexit_sleep_ms=10")
}
if os.Getenv("RUN_GARBLE_MAIN") == "true" {
os.Exit(main1())
}
os.Exit(testscript.RunMain(garbleMain{m}, map[string]func() int{
5 years ago
"garble": main1,
}))
}
type garbleMain struct {
m *testing.M
}
func (m garbleMain) Run() int {
// Start the Go proxy server running for all tests.
srv, err := goproxytest.NewServer("testdata/mod", "")
if err != nil {
panic(fmt.Sprintf("cannot start proxy: %v", err))
}
proxyURL = srv.URL
return m.m.Run()
}
5 years ago
var update = flag.Bool("u", false, "update testscript output files")
func TestScript(t *testing.T) {
5 years ago
t.Parallel()
execPath, err := os.Executable()
if err != nil {
t.Fatal(err)
}
tempCacheDir := t.TempDir()
hostCacheDir, err := os.UserCacheDir()
if err != nil {
t.Fatal(err)
}
p := testscript.Params{
Dir: filepath.Join("testdata", "script"),
5 years ago
Setup: func(env *testscript.Env) error {
// Use testdata/mod as our module proxy.
env.Setenv("GOPROXY", proxyURL)
// We use our own proxy, so avoid sum.golang.org.
env.Setenv("GONOSUMDB", "*")
// "go build" starts many short-lived Go processes,
// such as asm, buildid, compile, and link.
// They don't allocate huge amounts of memory,
// and they'll exit within seconds,
// so using the GC is basically a waste of CPU.
// Turn it off entirely, releasing memory on exit.
//
// We don't want this setting always on,
// as it could result in memory problems for users.
// But it helps for our test suite,
// as the packages are relatively small.
env.Setenv("GOGC", "off")
env.Setenv("gofullversion", runtime.Version())
env.Setenv("EXEC_PATH", execPath)
if os.Getenv("GOCOVERDIR") != "" {
// Don't share cache dirs with the host if we want to collect code
// coverage. Otherwise, the coverage info might be incomplete.
env.Setenv("GOCACHE", filepath.Join(tempCacheDir, "go-cache"))
replace our caching inside GOCACHE with GARBLE_CACHE For each Go package we obfuscate, we need to store information about how we obfuscated it, which is needed when obfuscating its dependents. For example, if A depends on B to use the type B.Foo, A needs to know whether or not B.Foo was obfuscated; it depends on B's use of reflect. We record this information in a gob file, which is cached on disk. To avoid rolling our own custom cache, and since garble is so closely connected with cmd/go already, we piggybacked off of Go's GOCACHE. In particular, for each build cache entry per `go list`'s Export field, we would store a "garble" sibling file with that gob content. However, this was brittle for two reasons: 1) We were doing this without cmd/go's permission or knowledge. We were careful to use filename suffixes similar to Export files, meaning that `go clean` and other commands would treat them the same. However, this could confuse cmd/go at any point in the future. 2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of the build and test caches under control. Right now, this means that every 24h, any file not accessed in the last five days is deleted. However, that trimming heuristic is done per-file. If the trimming removed Garble's sibling file but not the original Export file, this could cause errors such as "cannot load garble export file" which users already ran into. Instead, start using github.com/rogpeppe/go-internal/cache, an exported copy of cmd/go's own cache implementation for GOCACHE. Since we need an entirely separate directory, we introduce GARBLE_CACHE, defaulting to the "garble" directory inside the user's cache directory. For example, on Linux this would be ~/.cache/garble. Inside GARBLE_CACHE, our gob file cache will be under "build", which helps clarify that this cache is used when obfuscating Go builds, and allows placing other kinds of caches inside GARBLE_CACHE. For example, we already have a need for storing linker binaries, which for now still use their own caching mechanism. This commit does not make our cache properly resistant to removed files. The proof is that our seed.txtar testscript still fails the second case. However, we do rewrite all of our caching logic away from Export files, which in itself is a considerable refactor, and we add a few TODOs. One notable change is how we load gob files from dependencies when building the cache entry for the current package. We used to load the gob files from all packages in the Deps field. However, that is the list of all _transitive_ dependencies. Since these gob files are already flat, meaning they contain information about all of their transitive dependencies as well, we need only load the gob files from the direct dependencies, the Imports field. Performance is largely unchanged, since the behavior is similar. However, the change from Deps to Imports saves us some work, which can be seen in the reduced mallocs per obfuscated build. It's unclear why the binary size isn't stable. When reverting the Deps to Imports change, it then settles at 5.386Mi, which is almost exactly in between the two measurements below. I'm not sure why, but that metric appears to be slightly unstable. goos: linux goarch: amd64 pkg: mvdan.cc/garble cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics │ old │ new │ │ sec/op │ sec/op vs base │ Build-8 11.09 ± 1% 11.08 ± 1% ~ (p=0.796 n=10) │ old │ new │ │ bin-B │ bin-B vs base │ Build-8 5.390Mi ± 0% 5.382Mi ± 0% -0.14% (p=0.000 n=10) │ old │ new │ │ cached-sec/op │ cached-sec/op vs base │ Build-8 415.5m ± 4% 421.6m ± 1% ~ (p=0.190 n=10) │ old │ new │ │ mallocs/op │ mallocs/op vs base │ Build-8 35.43M ± 0% 34.05M ± 0% -3.89% (p=0.000 n=10) │ old │ new │ │ sys-sec/op │ sys-sec/op vs base │ Build-8 5.662 ± 1% 5.701 ± 2% ~ (p=0.280 n=10)
12 months ago
env.Setenv("GARBLE_CACHE", filepath.Join(tempCacheDir, "garble-cache"))
} else {
// GOCACHE is initialized by gotooltest to use the host's cache.
replace our caching inside GOCACHE with GARBLE_CACHE For each Go package we obfuscate, we need to store information about how we obfuscated it, which is needed when obfuscating its dependents. For example, if A depends on B to use the type B.Foo, A needs to know whether or not B.Foo was obfuscated; it depends on B's use of reflect. We record this information in a gob file, which is cached on disk. To avoid rolling our own custom cache, and since garble is so closely connected with cmd/go already, we piggybacked off of Go's GOCACHE. In particular, for each build cache entry per `go list`'s Export field, we would store a "garble" sibling file with that gob content. However, this was brittle for two reasons: 1) We were doing this without cmd/go's permission or knowledge. We were careful to use filename suffixes similar to Export files, meaning that `go clean` and other commands would treat them the same. However, this could confuse cmd/go at any point in the future. 2) cmd/go trims cache entries in GOCACHE regularly, to keep the size of the build and test caches under control. Right now, this means that every 24h, any file not accessed in the last five days is deleted. However, that trimming heuristic is done per-file. If the trimming removed Garble's sibling file but not the original Export file, this could cause errors such as "cannot load garble export file" which users already ran into. Instead, start using github.com/rogpeppe/go-internal/cache, an exported copy of cmd/go's own cache implementation for GOCACHE. Since we need an entirely separate directory, we introduce GARBLE_CACHE, defaulting to the "garble" directory inside the user's cache directory. For example, on Linux this would be ~/.cache/garble. Inside GARBLE_CACHE, our gob file cache will be under "build", which helps clarify that this cache is used when obfuscating Go builds, and allows placing other kinds of caches inside GARBLE_CACHE. For example, we already have a need for storing linker binaries, which for now still use their own caching mechanism. This commit does not make our cache properly resistant to removed files. The proof is that our seed.txtar testscript still fails the second case. However, we do rewrite all of our caching logic away from Export files, which in itself is a considerable refactor, and we add a few TODOs. One notable change is how we load gob files from dependencies when building the cache entry for the current package. We used to load the gob files from all packages in the Deps field. However, that is the list of all _transitive_ dependencies. Since these gob files are already flat, meaning they contain information about all of their transitive dependencies as well, we need only load the gob files from the direct dependencies, the Imports field. Performance is largely unchanged, since the behavior is similar. However, the change from Deps to Imports saves us some work, which can be seen in the reduced mallocs per obfuscated build. It's unclear why the binary size isn't stable. When reverting the Deps to Imports change, it then settles at 5.386Mi, which is almost exactly in between the two measurements below. I'm not sure why, but that metric appears to be slightly unstable. goos: linux goarch: amd64 pkg: mvdan.cc/garble cpu: AMD Ryzen 7 PRO 5850U with Radeon Graphics │ old │ new │ │ sec/op │ sec/op vs base │ Build-8 11.09 ± 1% 11.08 ± 1% ~ (p=0.796 n=10) │ old │ new │ │ bin-B │ bin-B vs base │ Build-8 5.390Mi ± 0% 5.382Mi ± 0% -0.14% (p=0.000 n=10) │ old │ new │ │ cached-sec/op │ cached-sec/op vs base │ Build-8 415.5m ± 4% 421.6m ± 1% ~ (p=0.190 n=10) │ old │ new │ │ mallocs/op │ mallocs/op vs base │ Build-8 35.43M ± 0% 34.05M ± 0% -3.89% (p=0.000 n=10) │ old │ new │ │ sys-sec/op │ sys-sec/op vs base │ Build-8 5.662 ± 1% 5.701 ± 2% ~ (p=0.280 n=10)
12 months ago
env.Setenv("GARBLE_CACHE", filepath.Join(hostCacheDir, "garble"))
}
5 years ago
return nil
},
// TODO: this condition should probably be supported by gotooltest
Condition: func(cond string) (bool, error) {
switch cond {
case "cgo":
out, err := exec.Command("go", "env", "CGO_ENABLED").CombinedOutput()
if err != nil {
return false, err
}
result := strings.TrimSpace(string(out))
switch result {
case "0", "1":
return result == "1", nil
default:
return false, fmt.Errorf("unknown CGO_ENABLED: %q", result)
}
}
return false, fmt.Errorf("unknown condition")
},
Cmds: map[string]func(ts *testscript.TestScript, neg bool, args []string){
"sleep": sleep,
"binsubstr": binsubstr,
"bincmp": bincmp,
"generate-literals": generateLiterals,
"setenvfile": setenvfile,
"grepfiles": grepfiles,
},
UpdateScripts: *update,
RequireExplicitExec: true,
RequireUniqueNames: true,
}
if err := gotooltest.Setup(&p); err != nil {
t.Fatal(err)
}
testscript.Run(t, p)
5 years ago
}
func createFile(ts *testscript.TestScript, path string) *os.File {
file, err := os.Create(ts.MkAbs(path))
if err != nil {
ts.Fatalf("%v", err)
}
return file
}
// sleep is akin to a shell's sleep builtin.
// Note that tests should almost never use this; it's currently only used to
// work around a low-level Go syscall race on Linux.
func sleep(ts *testscript.TestScript, neg bool, args []string) {
if len(args) != 1 {
ts.Fatalf("usage: sleep duration")
}
d, err := time.ParseDuration(args[0])
if err != nil {
ts.Fatalf("%v", err)
}
time.Sleep(d)
}
func binsubstr(ts *testscript.TestScript, neg bool, args []string) {
if len(args) < 2 {
ts.Fatalf("usage: binsubstr file substr...")
}
fix a data race with the global cachedBinary mechanism (#413) Spotted by our friend "go test -race": WARNING: DATA RACE Write at 0x0000010522d8 by goroutine 69: mvdan.cc/garble.readFile() garble/main_test.go:124 +0x23a mvdan.cc/garble.binsubstr() garble/main_test.go:141 +0xc4 github.com/rogpeppe/go-internal/testscript.(*TestScript).run() github.com/rogpeppe/go-internal@v1.8.1-0.20211023094830-115ce09fd6b4/testscript/testscript.go:496 +0x9e8 [...] Previous write at 0x0000010522d8 by goroutine 60: mvdan.cc/garble.readFile() garble/main_test.go:124 +0x23a mvdan.cc/garble.binsubstr() garble/main_test.go:141 +0xc4 github.com/rogpeppe/go-internal/testscript.(*TestScript).run() github.com/rogpeppe/go-internal@v1.8.1-0.20211023094830-115ce09fd6b4/testscript/testscript.go:496 +0x9e8 [...] This wasn't a data race that we spotted via failures in practice, as it only affected test code since July. The race is due to the fact that each test script runs as a parallel sub-test within the same Go program, sharing all globals. As such, a single "cached binary" global is read and written with races. Moreover, note that the caching always missed. I briefly rewrote the code to avoid the race via a sync.Map keyed by absolute filenames, and while that removed the data race, the caching never actually hit. To have a cache hit, we need an absolute path to already be in the cache and for it to not have been modified since it was last cached. That is: modify-bin-1 foo binsubstr foo 'abc' # miss binsubstr foo 'def' # hit; use the cached "/tmp/[...]/foo" entry modify-bin-2 foo binsubstr foo 'abc' # miss However, the test scripts don't do contiguous binsubstr calls like these. Instead, they join repeated binsubstr calls: modify-bin-1 foo binsubstr foo 'abc' 'def' # miss modify-bin-2 foo binsubstr foo 'abc' # miss For that reason, remove the extra code entirely. I didn't notice any change to the performance of "go test -short" with a warm build cache, with: go test -c ./garble.test -test.short #warm cache benchcmd -n 5 TestShort ./garble.test -test.short name old time/op new time/op delta TestShort 4.62s ±12% 4.35s ±12% ~ (p=0.310 n=5+5) name old user-time/op new user-time/op delta TestShort 16.8s ± 3% 16.7s ± 3% ~ (p=0.690 n=5+5) name old sys-time/op new sys-time/op delta TestShort 7.28s ± 1% 7.26s ± 2% ~ (p=0.841 n=5+5) name old peak-RSS-bytes new peak-RSS-bytes delta TestShort 305MB ± 0% 306MB ± 0% ~ (p=0.421 n=5+5) Finally, start using "go test -race" on Linux on CI, which should have made the PR back in July red before merging.
3 years ago
data := ts.ReadFile(args[0])
var failed []string
for _, substr := range args[1:] {
match := strings.Contains(data, substr)
if match && neg {
failed = append(failed, substr)
} else if !match && !neg {
failed = append(failed, substr)
}
}
if len(failed) > 0 && neg {
ts.Fatalf("unexpected match for %q in %s", failed, args[0])
} else if len(failed) > 0 {
ts.Fatalf("expected match for %q in %s", failed, args[0])
}
}
func bincmp(ts *testscript.TestScript, neg bool, args []string) {
if len(args) != 2 {
ts.Fatalf("usage: bincmp file1 file2")
}
for _, arg := range args {
switch arg {
case "stdout", "stderr":
// Note that the diffoscope call below would not deal with
// stdout/stderr either.
ts.Fatalf("bincmp is for binary files. did you mean cmp?")
}
}
data1 := ts.ReadFile(args[0])
data2 := ts.ReadFile(args[1])
if neg {
if data1 == data2 {
ts.Fatalf("%s and %s don't differ", args[0], args[1])
}
return
}
if data1 != data2 {
if _, err := exec.LookPath("diffoscope"); err == nil {
// We'll error below; ignore the exec error here.
ts.Exec("diffoscope",
"--diff-context", "2", // down from 7 by default
"--max-text-report-size", "4096", // no limit (in bytes) by default; avoid huge output
ts.MkAbs(args[0]), ts.MkAbs(args[1]))
} else {
ts.Logf("diffoscope not found; skipping")
}
outDir := "bincmp_output"
err := os.MkdirAll(outDir, 0o777)
ts.Check(err)
file1, err := os.CreateTemp(outDir, "file1-*")
ts.Check(err)
_, err = file1.Write([]byte(data1))
ts.Check(err)
err = file1.Close()
ts.Check(err)
file2, err := os.CreateTemp(outDir, "file2-*")
ts.Check(err)
_, err = file2.Write([]byte(data2))
ts.Check(err)
err = file2.Close()
ts.Check(err)
ts.Logf("wrote files to %s and %s", file1.Name(), file2.Name())
sizeDiff := len(data2) - len(data1)
ts.Fatalf("%s and %s differ; diffoscope above, size diff: %+d",
args[0], args[1], sizeDiff)
}
}
var testRand = mathrand.New(mathrand.NewSource(time.Now().UnixNano()))
func generateStringLit(minSize int) *ast.BasicLit {
buffer := make([]byte, minSize)
_, err := testRand.Read(buffer)
if err != nil {
panic(err)
}
return ah.StringLit(string(buffer) + "a_unique_string_that_is_part_of_all_extra_literals")
}
// generateLiterals creates a new source code file with a few random literals inside.
// All literals contain the string "a_unique_string_that_is_part_of_all_extra_literals"
// so we can later check if they are all obfuscated by looking for this substring.
// The code is designed such that the Go compiler does not optimize away the literals,
// which would destroy the test.
// This is achieved by defining a global variable `var x = ""` and an `init` function
// which appends all literals to `x`.
func generateLiterals(ts *testscript.TestScript, neg bool, args []string) {
if neg {
ts.Fatalf("unsupported: ! generate-literals")
}
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
3 years ago
if len(args) != 1 {
ts.Fatalf("usage: generate-literals file")
}
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
3 years ago
codePath := args[0]
// Global string variable to which which we append string literals: `var x = ""`
globalVar := &ast.GenDecl{
Tok: token.VAR,
Specs: []ast.Spec{
&ast.ValueSpec{
Names: []*ast.Ident{ast.NewIdent("x")},
Values: []ast.Expr{
&ast.BasicLit{Kind: token.STRING, Value: `""`},
},
},
},
}
var statements []ast.Stmt
// Assignments which append 100 random small literals to x: `x += "the_small_random_literal"`
for range 100 {
statements = append(
statements,
&ast.AssignStmt{
Lhs: []ast.Expr{ast.NewIdent("x")},
Tok: token.ADD_ASSIGN,
Rhs: []ast.Expr{generateStringLit(1 + testRand.Intn(255))},
},
)
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
3 years ago
}
// Assignments which append 5 random huge literals to x: `x += "the_huge_random_literal"`
// We add huge literals to make sure we obfuscate them fast.
do not try to obfuscate huge literals (#204) It's common for asset bundling code generators to produce huge literals, for example in strings. Our literal obfuscators are meant for relatively small string-like literals that a human would write, such as URLs, file paths, and English text. I ran some quick experiments, and it seems like "garble build -literals" appears to hang trying to obfuscate literals starting at 5-20KiB. It's not really hung; it's just doing a lot of busy work obfuscating those literals. The code it produces is also far from ideal, so it also takes some time to finally compile. The generated code also led to crashes. For example, using "garble build -literals -tiny" on a package containing literals of over a megabyte, our use of asthelper to remove comments and shuffle line numbers could run out of stack memory. This all points in one direction: we never designed "-literals" to deal with large sizes. Set a source-code-size limit of 2KiB. We alter the literals.txt test as well, to include a few 128KiB string literals. Before this fix, "go test" would seemingly hang on that test for over a minute (I did not wait any longer). With the fix, those large literals are not obfuscated, so the test ends in its usual 1-3s. As said in the const comment, I don't believe any of this is a big problem. Come Go 1.16, most developers should stop using asset-bundling code generators and use go:embed instead. If we wanted to somehow obfuscate those, it would be an entirely separate feature. And, if someone wants to work on obfuscating truly large literals for any reason, we need good tests and benchmarks to ensure garble does not consume CPU for minutes or run out of memory. I also simplified the generate-literals test command. The only argument that matters to the script is the filename, since it's used later on. Fixes #178.
3 years ago
// 5 * 128KiB is large enough that it would take a very, very long time
// to obfuscate those literals if too complex obfuscators are used.
for range 5 {
statements = append(
statements,
&ast.AssignStmt{
Lhs: []ast.Expr{ast.NewIdent("x")},
Tok: token.ADD_ASSIGN,
Rhs: []ast.Expr{generateStringLit(128 << 10)},
},
)
}
// An `init` function which includes all assignments from above
initFunc := &ast.FuncDecl{
Name: &ast.Ident{
Name: "init",
},
Type: &ast.FuncType{},
Body: ah.BlockStmt(statements...),
}
// A file with the global string variable and init function
file := &ast.File{
Name: ast.NewIdent("main"),
Decls: []ast.Decl{
globalVar,
initFunc,
},
}
codeFile := createFile(ts, codePath)
defer codeFile.Close()
if err := printer.Fprint(codeFile, token.NewFileSet(), file); err != nil {
ts.Fatalf("%v", err)
}
}
func setenvfile(ts *testscript.TestScript, neg bool, args []string) {
if neg {
ts.Fatalf("unsupported: ! setenvfile")
}
if len(args) != 2 {
ts.Fatalf("usage: setenvfile name file")
}
ts.Setenv(args[0], ts.ReadFile(args[1]))
}
func grepfiles(ts *testscript.TestScript, neg bool, args []string) {
if len(args) != 2 {
ts.Fatalf("usage: grepfiles path pattern")
}
anyFound := false
path, pattern := ts.MkAbs(args[0]), args[1]
rx := regexp.MustCompile(pattern)
if err := filepath.WalkDir(path, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if rx.MatchString(path) {
if neg {
return fmt.Errorf("%q matches %q", path, pattern)
} else {
anyFound = true
return fs.SkipAll
}
}
return nil
}); err != nil {
ts.Fatalf("%s", err)
}
if !neg && !anyFound {
ts.Fatalf("no matches for %q", pattern)
}
}
func TestSplitFlagsFromArgs(t *testing.T) {
t.Parallel()
tests := []struct {
name string
args []string
want [2][]string
}{
{"Empty", []string{}, [2][]string{{}, nil}},
{
"JustFlags",
[]string{"-foo", "bar", "-baz"},
[2][]string{{"-foo", "bar", "-baz"}, nil},
},
{
"JustArgs",
[]string{"some", "pkgs"},
[2][]string{{}, {"some", "pkgs"}},
},
{
"FlagsAndArgs",
[]string{"-foo=bar", "baz"},
[2][]string{{"-foo=bar"}, {"baz"}},
},
{
"BoolFlagsAndArgs",
[]string{"-race", "pkg"},
[2][]string{{"-race"}, {"pkg"}},
},
{
"ExplicitBoolFlag",
[]string{"-race=true", "pkg"},
[2][]string{{"-race=true"}, {"pkg"}},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
flags, args := splitFlagsFromArgs(test.args)
got := [2][]string{flags, args}
if diff := cmp.Diff(test.want, got); diff != "" {
t.Fatalf("splitFlagsFromArgs(%q) mismatch (-want +got):\n%s", test.args, diff)
}
})
}
}
fail if we are unexpectedly overwriting files (#418) While investigating a bug report, I noticed that garble was writing to the same temp file twice. At best, writing to the same path on disk twice is wasteful, as the design is careful to be deterministic and use unique paths. At worst, the two writes could cause races at the filesystem level. To prevent either of those situations, we now create files with os.OpenFile and os.O_EXCL, meaning that we will error if the file already exists. That change uncovered a number of such unintended cases. First, transformAsm would write obfuscated Go files twice. This is because the Go toolchain actually runs: [...]/asm -gensymabis [...] foo.s bar.s [...]/asm [...] foo.s bar.s That is, the first run is only meant to generate symbol ABIs, which are then used by the compiler. We need to obfuscate at that first stage, because the symbol ABI descriptions need to use obfuscated names. However, having already obfuscated the assembly on the first stage, there is no need to do so again on the second stage. If we detect gensymabis is missing, we simply reuse the previous files. This first situation doesn't seem racy, but obfuscating the Go assembly files twice is certainly unnecessary. Second, saveKnownReflectAPIs wrote a gob file to the build cache. Since the build cache can be kept between builds, and since the build cache uses reproducible paths for each build, running the same "garble build" twice could overwrite those files. This could actually cause races at the filesystem level; if two concurrent builds write to the same gob file on disk, one of them could end up using a partially-written file. Note that this is the only of the three cases not using temporary files. As such, it is expected that the file may already exist. In such a case, we simply avoid overwriting it rather than failing. Third, when "garble build -a" was used, and when we needed an export file not listed in importcfg, we would end up calling roughly: go list -export -toolexec=garble -a <dependency> This meant we would re-build and re-obfuscate those packages. Which is unfortunate, because the parent process already did via: go build -toolexec=garble -a <main> The repeated dependency builds tripped the new os.O_EXCL check, as we would try to overwrite the same obfuscated Go files. Beyond being wasteful, this could again cause subtle filesystem races. To fix the problem, avoid passing flags like "-a" to nested go commands. Overall, we should likely be using safer ways to write to disk, be it via either atomic writes or locked files. However, for now, catching duplicate writes is a big step. I have left a self-assigned TODO for further improvements. CI on the pull request found a failure on test-gotip. The failure reproduces on master, so it seems to be related to gotip, and not a regression introduced by this change. For now, disable test-gotip until we can investigate.
3 years ago
func TestFilterForwardBuildFlags(t *testing.T) {
t.Parallel()
tests := []struct {
name string
flags []string
want []string
}{
{"Empty", []string{}, nil},
{
"NoBuild",
[]string{"-short", "-json"},
nil,
},
{
"Mixed",
[]string{"-short", "-tags", "foo", "-mod=readonly", "-json"},
[]string{"-tags", "foo", "-mod=readonly"},
},
{
"NonBinarySkipped",
[]string{"-o", "binary", "-tags", "foo"},
[]string{"-tags", "foo"},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
fail if we are unexpectedly overwriting files (#418) While investigating a bug report, I noticed that garble was writing to the same temp file twice. At best, writing to the same path on disk twice is wasteful, as the design is careful to be deterministic and use unique paths. At worst, the two writes could cause races at the filesystem level. To prevent either of those situations, we now create files with os.OpenFile and os.O_EXCL, meaning that we will error if the file already exists. That change uncovered a number of such unintended cases. First, transformAsm would write obfuscated Go files twice. This is because the Go toolchain actually runs: [...]/asm -gensymabis [...] foo.s bar.s [...]/asm [...] foo.s bar.s That is, the first run is only meant to generate symbol ABIs, which are then used by the compiler. We need to obfuscate at that first stage, because the symbol ABI descriptions need to use obfuscated names. However, having already obfuscated the assembly on the first stage, there is no need to do so again on the second stage. If we detect gensymabis is missing, we simply reuse the previous files. This first situation doesn't seem racy, but obfuscating the Go assembly files twice is certainly unnecessary. Second, saveKnownReflectAPIs wrote a gob file to the build cache. Since the build cache can be kept between builds, and since the build cache uses reproducible paths for each build, running the same "garble build" twice could overwrite those files. This could actually cause races at the filesystem level; if two concurrent builds write to the same gob file on disk, one of them could end up using a partially-written file. Note that this is the only of the three cases not using temporary files. As such, it is expected that the file may already exist. In such a case, we simply avoid overwriting it rather than failing. Third, when "garble build -a" was used, and when we needed an export file not listed in importcfg, we would end up calling roughly: go list -export -toolexec=garble -a <dependency> This meant we would re-build and re-obfuscate those packages. Which is unfortunate, because the parent process already did via: go build -toolexec=garble -a <main> The repeated dependency builds tripped the new os.O_EXCL check, as we would try to overwrite the same obfuscated Go files. Beyond being wasteful, this could again cause subtle filesystem races. To fix the problem, avoid passing flags like "-a" to nested go commands. Overall, we should likely be using safer ways to write to disk, be it via either atomic writes or locked files. However, for now, catching duplicate writes is a big step. I have left a self-assigned TODO for further improvements. CI on the pull request found a failure on test-gotip. The failure reproduces on master, so it seems to be related to gotip, and not a regression introduced by this change. For now, disable test-gotip until we can investigate.
3 years ago
got, _ := filterForwardBuildFlags(test.flags)
if diff := cmp.Diff(test.want, got); diff != "" {
fail if we are unexpectedly overwriting files (#418) While investigating a bug report, I noticed that garble was writing to the same temp file twice. At best, writing to the same path on disk twice is wasteful, as the design is careful to be deterministic and use unique paths. At worst, the two writes could cause races at the filesystem level. To prevent either of those situations, we now create files with os.OpenFile and os.O_EXCL, meaning that we will error if the file already exists. That change uncovered a number of such unintended cases. First, transformAsm would write obfuscated Go files twice. This is because the Go toolchain actually runs: [...]/asm -gensymabis [...] foo.s bar.s [...]/asm [...] foo.s bar.s That is, the first run is only meant to generate symbol ABIs, which are then used by the compiler. We need to obfuscate at that first stage, because the symbol ABI descriptions need to use obfuscated names. However, having already obfuscated the assembly on the first stage, there is no need to do so again on the second stage. If we detect gensymabis is missing, we simply reuse the previous files. This first situation doesn't seem racy, but obfuscating the Go assembly files twice is certainly unnecessary. Second, saveKnownReflectAPIs wrote a gob file to the build cache. Since the build cache can be kept between builds, and since the build cache uses reproducible paths for each build, running the same "garble build" twice could overwrite those files. This could actually cause races at the filesystem level; if two concurrent builds write to the same gob file on disk, one of them could end up using a partially-written file. Note that this is the only of the three cases not using temporary files. As such, it is expected that the file may already exist. In such a case, we simply avoid overwriting it rather than failing. Third, when "garble build -a" was used, and when we needed an export file not listed in importcfg, we would end up calling roughly: go list -export -toolexec=garble -a <dependency> This meant we would re-build and re-obfuscate those packages. Which is unfortunate, because the parent process already did via: go build -toolexec=garble -a <main> The repeated dependency builds tripped the new os.O_EXCL check, as we would try to overwrite the same obfuscated Go files. Beyond being wasteful, this could again cause subtle filesystem races. To fix the problem, avoid passing flags like "-a" to nested go commands. Overall, we should likely be using safer ways to write to disk, be it via either atomic writes or locked files. However, for now, catching duplicate writes is a big step. I have left a self-assigned TODO for further improvements. CI on the pull request found a failure on test-gotip. The failure reproduces on master, so it seems to be related to gotip, and not a regression introduced by this change. For now, disable test-gotip until we can investigate.
3 years ago
t.Fatalf("filterForwardBuildFlags(%q) mismatch (-want +got):\n%s", test.flags, diff)
}
})
}
}
func TestFlagValue(t *testing.T) {
t.Parallel()
tests := []struct {
name string
flags []string
flagName string
want string
}{
{"StrSpace", []string{"-buildid", "bar"}, "-buildid", "bar"},
{"StrSpaceDash", []string{"-buildid", "-bar"}, "-buildid", "-bar"},
{"StrEqual", []string{"-buildid=bar"}, "-buildid", "bar"},
{"StrEqualDash", []string{"-buildid=-bar"}, "-buildid", "-bar"},
{"StrMissing", []string{"-foo"}, "-buildid", ""},
{"StrNotFollowed", []string{"-buildid"}, "-buildid", ""},
{"StrEmpty", []string{"-buildid="}, "-buildid", ""},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
t.Parallel()
got := flagValue(test.flags, test.flagName)
if got != test.want {
t.Fatalf("flagValue(%q, %q) got %q, want %q",
test.flags, test.flagName, got, test.want)
}
})
}
}