You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
garble/hash.go

416 lines
14 KiB
Go

// Copyright (c) 2019, The Garble Authors.
// See LICENSE for licensing information.
package main
import (
"bytes"
"crypto/sha256"
"encoding/base64"
"encoding/binary"
"fmt"
"go/token"
"go/types"
"io"
"os/exec"
"strings"
"mvdan.cc/garble/internal/literals"
)
const buildIDSeparator = "/"
// splitActionID returns the action ID half of a build ID, the first hash.
func splitActionID(buildID string) string {
return buildID[:strings.Index(buildID, buildIDSeparator)]
}
// splitContentID returns the content ID half of a build ID, the last hash.
func splitContentID(buildID string) string {
return buildID[strings.LastIndex(buildID, buildIDSeparator)+1:]
}
// buildIDHashLength is the number of bytes each build ID hash takes,
// such as an action ID or a content ID.
const buildIDHashLength = 15
// decodeBuildIDHash decodes a build ID hash in base64, just like cmd/go does.
func decodeBuildIDHash(str string) []byte {
h, err := base64.RawURLEncoding.DecodeString(str)
if err != nil {
panic(fmt.Sprintf("invalid hash %q: %v", str, err))
}
if len(h) != buildIDHashLength {
panic(fmt.Sprintf("decodeBuildIDHash expects to result in a hash of length %d, got %d", buildIDHashLength, len(h)))
}
return h
}
// encodeBuildIDHash encodes a build ID hash in base64, just like cmd/go does.
func encodeBuildIDHash(h [sha256.Size]byte) string {
return base64.RawURLEncoding.EncodeToString(h[:buildIDHashLength])
}
func alterToolVersion(tool string, args []string) error {
cmd := exec.Command(args[0], args[1:]...)
out, err := cmd.Output()
if err != nil {
if err, _ := err.(*exec.ExitError); err != nil {
return fmt.Errorf("%v: %s", err, err.Stderr)
}
return err
}
line := string(bytes.TrimSpace(out)) // no trailing newline
f := strings.Fields(line)
if len(f) < 3 || f[0] != tool || f[1] != "version" || f[2] == "devel" && !strings.HasPrefix(f[len(f)-1], "buildID=") {
return fmt.Errorf("%s -V=full: unexpected output:\n\t%s", args[0], line)
}
var toolID []byte
if f[2] == "devel" {
// On the development branch, use the content ID part of the build ID.
toolID = decodeBuildIDHash(splitContentID(f[len(f)-1]))
} else {
// For a release, the output is like: "compile version go1.9.1 X:framepointer".
// Use the whole line, as we can assume it's unique.
toolID = []byte(line)
}
contentID := addGarbleToHash(toolID)
// The part of the build ID that matters is the last, since it's the
// "content ID" which is used to work out whether there is a need to redo
// the action (build) or not. Since cmd/go parses the last word in the
// output as "buildID=...", we simply add "+garble buildID=_/_/_/${hash}".
// The slashes let us imitate a full binary build ID, but we assume that
// the other hashes such as the action ID are not necessary, since the
// only reader here is cmd/go and it only consumes the content ID.
fmt.Printf("%s +garble buildID=_/_/_/%s\n", line, encodeBuildIDHash(contentID))
return nil
}
var (
hasher = sha256.New()
sumBuffer [sha256.Size]byte
)
// addGarbleToHash takes some arbitrary input bytes,
// typically a hash such as an action ID or a content ID,
// and returns a new hash which also contains garble's own deterministic inputs.
//
// This includes garble's own version, obtained via its own binary's content ID,
// as well as any other options which affect a build, such as GOGARBLE and -tiny.
func addGarbleToHash(inputHash []byte) [sha256.Size]byte {
// Join the two content IDs together into a single base64-encoded sha256
// sum. This includes the original tool's content ID, and garble's own
// content ID.
hasher.Reset()
hasher.Write(inputHash)
if len(sharedCache.BinaryContentID) == 0 {
panic("missing binary content ID")
}
hasher.Write(sharedCache.BinaryContentID)
// We also need to add the selected options to the full version string,
// because all of them result in different output. We use spaces to
// separate the env vars and flags, to reduce the chances of collisions.
fmt.Fprintf(hasher, " GOGARBLE=%s", sharedCache.GOGARBLE)
appendFlags(hasher, true)
// addGarbleToHash returns the sum buffer, so we need a new copy.
// Otherwise the next use of the global sumBuffer would conflict.
var sumBuffer [sha256.Size]byte
hasher.Sum(sumBuffer[:0])
return sumBuffer
}
// appendFlags writes garble's own flags to w in string form.
// Errors are ignored, as w is always a buffer or hasher.
// If forBuildHash is set, only the flags affecting a build are written.
func appendFlags(w io.Writer, forBuildHash bool) {
if flagLiterals {
io.WriteString(w, " -literals")
}
if flagTiny {
io.WriteString(w, " -tiny")
}
if flagDebug && !forBuildHash {
// -debug doesn't affect the build result at all,
// so don't give it separate entries in the build cache.
// If the user really wants to see debug info for already built deps,
// they can use "go clean cache" or the "-a" build flag to rebuild.
io.WriteString(w, " -debug")
}
if flagDebugDir != "" && !forBuildHash {
// -debugdir is a bit special.
//
// When passing down flags via -toolexec,
// we do want the actual flag value to be kept.
//
// For build hashes, we can skip the flag entirely,
// as it doesn't affect obfuscation at all.
//
// TODO: in the future, we could avoid using the -a build flag
// by using "-debugdir=yes" here, and caching the obfuscated source.
// Incremental builds would recover the cached source
// to repopulate the output directory if it was removed.
io.WriteString(w, " -debugdir=")
io.WriteString(w, flagDebugDir)
}
if flagSeed.present() {
io.WriteString(w, " -seed=")
io.WriteString(w, flagSeed.String())
}
if flagControlFlow && forBuildHash {
io.WriteString(w, " -ctrlflow")
}
if literals.TestObfuscator != "" && forBuildHash {
io.WriteString(w, literals.TestObfuscator)
}
}
func buildidOf(path string) (string, error) {
cmd := exec.Command("go", "tool", "buildid", path)
out, err := cmd.Output()
if err != nil {
if err, _ := err.(*exec.ExitError); err != nil {
return "", fmt.Errorf("%v: %s", err, err.Stderr)
}
return "", err
}
return string(out), nil
}
var (
// Hashed names are base64-encoded.
// Go names can only be letters, numbers, and underscores.
// This means we can use base64's URL encoding, minus '-',
// which is later replaced with a duplicate 'a'.
// Such a lossy encoding is fine, since we never decode hashes.
// We don't need padding either, as we take a short prefix anyway.
nameBase64 = base64.URLEncoding.WithPadding(base64.NoPadding)
b64NameBuffer [12]byte // nameBase64.EncodedLen(neededSumBytes) = 12
)
// These funcs mimic the unicode package API, but byte-based since we know
// base64 is all ASCII.
func isDigit(b byte) bool { return '0' <= b && b <= '9' }
func isLower(b byte) bool { return 'a' <= b && b <= 'z' }
func isUpper(b byte) bool { return 'A' <= b && b <= 'Z' }
func toLower(b byte) byte { return b + ('a' - 'A') }
func toUpper(b byte) byte { return b - ('a' - 'A') }
func runtimeHashWithCustomSalt(salt []byte) uint32 {
hasher.Reset()
if !flagSeed.present() {
hasher.Write(sharedCache.ListedPackages["runtime"].GarbleActionID[:])
} else {
hasher.Write(flagSeed.bytes)
}
hasher.Write(salt)
sum := hasher.Sum(sumBuffer[:0])
return binary.LittleEndian.Uint32(sum)
}
// magicValue returns random magic value based
// on user specified seed or the runtime package's GarbleActionID.
func magicValue() uint32 {
return runtimeHashWithCustomSalt([]byte("magic"))
}
// entryOffKey returns random entry offset key
// on user specified seed or the runtime package's GarbleActionID.
func entryOffKey() uint32 {
return runtimeHashWithCustomSalt([]byte("entryOffKey"))
}
func hashWithPackage(pkg *listedPackage, name string) string {
// If the user provided us with an obfuscation seed,
// we use that with the package import path directly..
// Otherwise, we use GarbleActionID as a fallback salt.
if !flagSeed.present() {
return hashWithCustomSalt(pkg.GarbleActionID[:], name)
}
// Use a separator at the end of ImportPath as a salt,
// to ensure that "pkgfoo.bar" and "pkg.foobar" don't both hash
// as the same string "pkgfoobar".
return hashWithCustomSalt([]byte(pkg.ImportPath+"|"), name)
}
// stripStructTags takes the bytes produced by [types.WriteType]
// and removes any struct tags in-place, such as rewriting
//
// struct{Foo int; Bar string "json:\"bar\""}
//
// into
//
// struct{Foo int; Bar string}
//
// Note that, unlike most Go source, WriteType uses double quotes for tags.
//
// Reusing WriteType does require a second pass over its output here,
// which we could save by implementing our own modified version of WriteType.
// However, that would be a significant amount of code to maintain.
func stripStructTags(p []byte) []byte {
i := 0
for i < len(p) {
b := p[i]
start := i - 1 // a struct tag is preceded by a space
i++
if b != '"' {
continue
}
// Find the closing double quote, skipping over escaped characters.
// Note that we should probably iterate over runes and not bytes,
// but this byte implementation is probably good enough in practice.
for {
b = p[i]
i++
if b == '\\' {
i++
} else if b == '"' {
break
}
}
end := i
// Remove the bytes between start and end,
// and reset i to start, since we just shortened p.
p = append(p[:start], p[end:]...)
i = start
}
return p
}
var typeIdentityBuf bytes.Buffer
// hashWithStruct is separate from hashWithPackage since Go
// allows converting between struct types across packages.
// Hashing struct field names differently between packages would break that.
//
// We hash field names with the identity struct type as a salt
// so that the same field name used in different struct types is obfuscated differently.
// Note that "identity" means omitting struct tags since conversions ignore them.
func hashWithStruct(strct *types.Struct, field *types.Var) string {
typeIdentityBuf.Reset()
types.WriteType(&typeIdentityBuf, strct, nil)
salt := stripStructTags(typeIdentityBuf.Bytes())
// If the user provided us with an obfuscation seed,
// we only use the identity struct type as a salt.
// Otherwise, we add garble's own inputs to the salt as a fallback.
if !flagSeed.present() {
withGarbleHash := addGarbleToHash(salt)
salt = withGarbleHash[:]
}
return hashWithCustomSalt(salt, field.Name())
}
// minHashLength and maxHashLength define the range for the number of base64
// characters to use for the final hashed name.
//
// minHashLength needs to be long enough to realistically avoid hash collisions,
// but maxHashLength should be short enough to not bloat binary sizes.
// The namespace for collisions is generally a single package, since
// that's where most hashed names are namespaced to.
//
// Using a "hash collision" formula, and taking a generous estimate of a
// package having 10k names, we get the following probabilities.
// Most packages will have far fewer names, but some packages are huge,
// especially generated ones.
//
// We also have slightly fewer bits in practice, since the base64
// charset has 'z' twice, and the first base64 char is coerced into a
// valid Go identifier. So we must be conservative.
// Remember that base64 stores 6 bits per encoded byte.
// The probability numbers are approximated.
//
// length (base64) | length (bits) | collision probability
// -------------------------------------------------------
// 4 24 ~95%
// 5 30 ~4%
// 6 36 ~0.07%
// 7 42 ~0.001%
// 8 48 ~0.00001%
//
// We want collisions to be practically impossible, so the hashed names end up
// with lengths evenly distributed between 6 and 12. Naively, this results in an
// average length of 9, which has a chance well below 1 in a million even when a
// package has thousands of obfuscated names.
//
// These numbers are also chosen to keep obfuscated binary sizes reasonable.
// For example, increasing the average length of 9 by 1 results in roughly a 1%
// increase in binary sizes.
const (
minHashLength = 6
maxHashLength = 12
// At most we'll need maxHashLength base64 characters,
// so 9 checksum bytes are enough for that purpose,
// which is nameBase64.DecodedLen(12) being rounded up.
neededSumBytes = 9
)
// hashWithCustomSalt returns a hashed version of name,
// including the provided salt as well as opts.Seed into the hash input.
//
// The result is always four bytes long. If the input was a valid identifier,
// the output remains equally exported or unexported. Note that this process is
// reproducible, but not reversible.
func hashWithCustomSalt(salt []byte, name string) string {
if len(salt) == 0 {
panic("hashWithCustomSalt: empty salt")
}
if name == "" {
panic("hashWithCustomSalt: empty name")
}
hasher.Reset()
hasher.Write(salt)
hasher.Write(flagSeed.bytes)
io.WriteString(hasher, name)
sum := hasher.Sum(sumBuffer[:0])
// The byte after neededSumBytes is never used as part of the name,
// but it is still deterministic and hard to predict,
// so it provides us with useful randomness between 0 and 255.
// We want the number to be between 0 and hashLenthRange-1 as well,
// so we use a remainder operation.
hashLengthRandomness := sum[neededSumBytes] % ((maxHashLength - minHashLength) + 1)
hashLength := minHashLength + hashLengthRandomness
nameBase64.Encode(b64NameBuffer[:], sum[:neededSumBytes])
b64Name := b64NameBuffer[:hashLength]
// Even if we are hashing a package path, which is not an identifier,
// we still want the result to be a valid identifier,
// since we'll use it as the package name too.
if isDigit(b64Name[0]) {
// Turn "3foo" into "Dfoo".
// Similar to toLower, since uppercase letters go after digits
// in the ASCII table.
b64Name[0] += 'A' - '0'
}
for i, b := range b64Name {
if b == '-' { // URL encoding uses dashes, which aren't valid
b64Name[i] = 'a'
}
}
// Valid identifiers should stay exported or unexported.
if token.IsIdentifier(name) {
if token.IsExported(name) {
if b64Name[0] == '_' {
// Turn "_foo" into "Zfoo".
b64Name[0] = 'Z'
} else if isLower(b64Name[0]) {
// Turn "afoo" into "Afoo".
b64Name[0] = toUpper(b64Name[0])
}
} else {
if isUpper(b64Name[0]) {
// Turn "Afoo" into "afoo".
b64Name[0] = toLower(b64Name[0])
}
}
}
return string(b64Name)
}