// Copyright 2020 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Package fuzz provides common fuzzing functionality for tests built with // "go test" and for programs that use fuzzing functionality in the testing // package. package fuzz import ( "bytes" "context" "crypto/sha256" "errors" "fmt" "internal/godebug" "io" "math/bits" "os" "path/filepath" "reflect" "runtime" "strings" "time" ) // CoordinateFuzzingOpts is a set of arguments for CoordinateFuzzing. // The zero value is valid for each field unless specified otherwise. type CoordinateFuzzingOpts struct { // Log is a writer for logging progress messages and warnings. // If nil, io.Discard will be used instead. Log io.Writer // Timeout is the amount of wall clock time to spend fuzzing after the corpus // has loaded. If zero, there will be no time limit. Timeout time.Duration // Limit is the number of random values to generate and test. If zero, // there will be no limit on the number of generated values. Limit int64 // MinimizeTimeout is the amount of wall clock time to spend minimizing // after discovering a crasher. If zero, there will be no time limit. If // MinimizeTimeout and MinimizeLimit are both zero, then minimization will // be disabled. MinimizeTimeout time.Duration // MinimizeLimit is the maximum number of calls to the fuzz function to be // made while minimizing after finding a crash. If zero, there will be no // limit. Calls to the fuzz function made when minimizing also count toward // Limit. If MinimizeTimeout and MinimizeLimit are both zero, then // minimization will be disabled. MinimizeLimit int64 // parallel is the number of worker processes to run in parallel. If zero, // CoordinateFuzzing will run GOMAXPROCS workers. Parallel int // Seed is a list of seed values added by the fuzz target with testing.F.Add // and in testdata. Seed []CorpusEntry // Types is the list of types which make up a corpus entry. // Types must be set and must match values in Seed. Types []reflect.Type // CorpusDir is a directory where files containing values that crash the // code being tested may be written. CorpusDir must be set. CorpusDir string // CacheDir is a directory containing additional "interesting" values. // The fuzzer may derive new values from these, and may write new values here. CacheDir string } // CoordinateFuzzing creates several worker processes and communicates with // them to test random inputs that could trigger crashes and expose bugs. // The worker processes run the same binary in the same directory with the // same environment variables as the coordinator process. Workers also run // with the same arguments as the coordinator, except with the -test.fuzzworker // flag prepended to the argument list. // // If a crash occurs, the function will return an error containing information // about the crash, which can be reported to the user. func CoordinateFuzzing(ctx context.Context, opts CoordinateFuzzingOpts) (err error) { if err := ctx.Err(); err != nil { return err } if opts.Log == nil { opts.Log = io.Discard } if opts.Parallel == 0 { opts.Parallel = runtime.GOMAXPROCS(0) } if opts.Limit > 0 && int64(opts.Parallel) > opts.Limit { // Don't start more workers than we need. opts.Parallel = int(opts.Limit) } c, err := newCoordinator(opts) if err != nil { return err } if opts.Timeout > 0 { var cancel func() ctx, cancel = context.WithTimeout(ctx, opts.Timeout) defer cancel() } // fuzzCtx is used to stop workers, for example, after finding a crasher. fuzzCtx, cancelWorkers := context.WithCancel(ctx) defer cancelWorkers() doneC := ctx.Done() // stop is called when a worker encounters a fatal error. var fuzzErr error stopping := false stop := func(err error) { if err == fuzzCtx.Err() || isInterruptError(err) { // Suppress cancellation errors and terminations due to SIGINT. // The messages are not helpful since either the user triggered the error // (with ^C) or another more helpful message will be printed (a crasher). err = nil } if err != nil && (fuzzErr == nil || fuzzErr == ctx.Err()) { fuzzErr = err } if stopping { return } stopping = true cancelWorkers() doneC = nil } // Ensure that any crash we find is written to the corpus, even if an error // or interruption occurs while minimizing it. crashWritten := false defer func() { if c.crashMinimizing == nil || crashWritten { return } werr := writeToCorpus(&c.crashMinimizing.entry, opts.CorpusDir) if werr != nil { err = fmt.Errorf("%w\n%v", err, werr) return } if err == nil { err = &crashError{ path: c.crashMinimizing.entry.Path, err: errors.New(c.crashMinimizing.crasherMsg), } } }() // Start workers. // TODO(jayconrod): do we want to support fuzzing different binaries? dir := "" // same as self binPath := os.Args[0] args := append([]string{"-test.fuzzworker"}, os.Args[1:]...) env := os.Environ() // same as self errC := make(chan error) workers := make([]*worker, opts.Parallel) for i := range workers { var err error workers[i], err = newWorker(c, dir, binPath, args, env) if err != nil { return err } } for i := range workers { w := workers[i] go func() { err := w.coordinate(fuzzCtx) if fuzzCtx.Err() != nil || isInterruptError(err) { err = nil } cleanErr := w.cleanup() if err == nil { err = cleanErr } errC <- err }() } // Main event loop. // Do not return until all workers have terminated. We avoid a deadlock by // receiving messages from workers even after ctx is cancelled. activeWorkers := len(workers) statTicker := time.NewTicker(3 * time.Second) defer statTicker.Stop() defer c.logStats() c.logStats() for { var inputC chan fuzzInput input, ok := c.peekInput() if ok && c.crashMinimizing == nil && !stopping { inputC = c.inputC } var minimizeC chan fuzzMinimizeInput minimizeInput, ok := c.peekMinimizeInput() if ok && !stopping { minimizeC = c.minimizeC } select { case <-doneC: // Interrupted, cancelled, or timed out. // stop sets doneC to nil so we don't busy wait here. stop(ctx.Err()) case err := <-errC: // A worker terminated, possibly after encountering a fatal error. stop(err) activeWorkers-- if activeWorkers == 0 { return fuzzErr } case result := <-c.resultC: // Received response from worker. if stopping { break } c.updateStats(result) if result.crasherMsg != "" { if c.warmupRun() && result.entry.IsSeed { target := filepath.Base(c.opts.CorpusDir) fmt.Fprintf(c.opts.Log, "failure while testing seed corpus entry: %s/%s\n", target, testName(result.entry.Parent)) stop(errors.New(result.crasherMsg)) break } if c.canMinimize() && result.canMinimize { if c.crashMinimizing != nil { // This crash is not minimized, and another crash is being minimized. // Ignore this one and wait for the other one to finish. break } // Found a crasher but haven't yet attempted to minimize it. // Send it back to a worker for minimization. Disable inputC so // other workers don't continue fuzzing. c.crashMinimizing = &result fmt.Fprintf(c.opts.Log, "fuzz: minimizing %d-byte failing input file\n", len(result.entry.Data)) c.queueForMinimization(result, nil) } else if !crashWritten { // Found a crasher that's either minimized or not minimizable. // Write to corpus and stop. err := writeToCorpus(&result.entry, opts.CorpusDir) if err == nil { crashWritten = true err = &crashError{ path: result.entry.Path, err: errors.New(result.crasherMsg), } } if shouldPrintDebugInfo() { fmt.Fprintf( c.opts.Log, "DEBUG new crasher, elapsed: %s, id: %s, parent: %s, gen: %d, size: %d, exec time: %s\n", c.elapsed(), result.entry.Path, result.entry.Parent, result.entry.Generation, len(result.entry.Data), result.entryDuration, ) } stop(err) } } else if result.coverageData != nil { if c.warmupRun() { if shouldPrintDebugInfo() { fmt.Fprintf( c.opts.Log, "DEBUG processed an initial input, elapsed: %s, id: %s, new bits: %d, size: %d, exec time: %s\n", c.elapsed(), result.entry.Parent, countBits(diffCoverage(c.coverageMask, result.coverageData)), len(result.entry.Data), result.entryDuration, ) } c.updateCoverage(result.coverageData) c.warmupInputLeft-- if c.warmupInputLeft == 0 { fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) if shouldPrintDebugInfo() { fmt.Fprintf( c.opts.Log, "DEBUG finished processing input corpus, elapsed: %s, entries: %d, initial coverage bits: %d\n", c.elapsed(), len(c.corpus.entries), countBits(c.coverageMask), ) } } } else if keepCoverage := diffCoverage(c.coverageMask, result.coverageData); keepCoverage != nil { // Found a value that expanded coverage. // It's not a crasher, but we may want to add it to the on-disk // corpus and prioritize it for future fuzzing. // TODO(jayconrod, katiehockman): Prioritize fuzzing these // values which expanded coverage, perhaps based on the // number of new edges that this result expanded. // TODO(jayconrod, katiehockman): Don't write a value that's already // in the corpus. if c.canMinimize() && result.canMinimize && c.crashMinimizing == nil { // Send back to workers to find a smaller value that preserves // at least one new coverage bit. c.queueForMinimization(result, keepCoverage) } else { // Update the coordinator's coverage mask and save the value. inputSize := len(result.entry.Data) entryNew, err := c.addCorpusEntries(true, result.entry) if err != nil { stop(err) break } if !entryNew { continue } c.updateCoverage(keepCoverage) c.inputQueue.enqueue(result.entry) c.interestingCount++ if shouldPrintDebugInfo() { fmt.Fprintf( c.opts.Log, "DEBUG new interesting input, elapsed: %s, id: %s, parent: %s, gen: %d, new bits: %d, total bits: %d, size: %d, exec time: %s\n", c.elapsed(), result.entry.Path, result.entry.Parent, result.entry.Generation, countBits(keepCoverage), countBits(c.coverageMask), inputSize, result.entryDuration, ) } } } else { if shouldPrintDebugInfo() { fmt.Fprintf( c.opts.Log, "DEBUG worker reported interesting input that doesn't expand coverage, elapsed: %s, id: %s, parent: %s, canMinimize: %t\n", c.elapsed(), result.entry.Path, result.entry.Parent, result.canMinimize, ) } } } else if c.warmupRun() { // No error or coverage data was reported for this input during // warmup, so continue processing results. c.warmupInputLeft-- if c.warmupInputLeft == 0 { fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed, now fuzzing with %d workers\n", c.elapsed(), c.warmupInputCount, c.warmupInputCount, c.opts.Parallel) if shouldPrintDebugInfo() { fmt.Fprintf( c.opts.Log, "DEBUG finished testing-only phase, elapsed: %s, entries: %d\n", time.Since(c.startTime), len(c.corpus.entries), ) } } } // Once the result has been processed, stop the worker if we // have reached the fuzzing limit. if c.opts.Limit > 0 && c.count >= c.opts.Limit { stop(nil) } case inputC <- input: // Sent the next input to a worker. c.sentInput(input) case minimizeC <- minimizeInput: // Sent the next input for minimization to a worker. c.sentMinimizeInput(minimizeInput) case <-statTicker.C: c.logStats() } } // TODO(jayconrod,katiehockman): if a crasher can't be written to the corpus, // write to the cache instead. } // crashError wraps a crasher written to the seed corpus. It saves the name // of the file where the input causing the crasher was saved. The testing // framework uses this to report a command to re-run that specific input. type crashError struct { path string err error } func (e *crashError) Error() string { return e.err.Error() } func (e *crashError) Unwrap() error { return e.err } func (e *crashError) CrashPath() string { return e.path } type corpus struct { entries []CorpusEntry hashes map[[sha256.Size]byte]bool } // addCorpusEntries adds entries to the corpus, and optionally writes the entries // to the cache directory. If an entry is already in the corpus it is skipped. If // all of the entries are unique, addCorpusEntries returns true and a nil error, // if at least one of the entries was a duplicate, it returns false and a nil error. func (c *coordinator) addCorpusEntries(addToCache bool, entries ...CorpusEntry) (bool, error) { noDupes := true for _, e := range entries { data, err := corpusEntryData(e) if err != nil { return false, err } h := sha256.Sum256(data) if c.corpus.hashes[h] { noDupes = false continue } if addToCache { if err := writeToCorpus(&e, c.opts.CacheDir); err != nil { return false, err } // For entries written to disk, we don't hold onto the bytes, // since the corpus would consume a significant amount of // memory. e.Data = nil } c.corpus.hashes[h] = true c.corpus.entries = append(c.corpus.entries, e) } return noDupes, nil } // CorpusEntry represents an individual input for fuzzing. // // We must use an equivalent type in the testing and testing/internal/testdeps // packages, but testing can't import this package directly, and we don't want // to export this type from testing. Instead, we use the same struct type and // use a type alias (not a defined type) for convenience. type CorpusEntry = struct { Parent string // Path is the path of the corpus file, if the entry was loaded from disk. // For other entries, including seed values provided by f.Add, Path is the // name of the test, e.g. seed#0 or its hash. Path string // Data is the raw input data. Data should only be populated for seed // values. For on-disk corpus files, Data will be nil, as it will be loaded // from disk using Path. Data []byte // Values is the unmarshaled values from a corpus file. Values []any Generation int // IsSeed indicates whether this entry is part of the seed corpus. IsSeed bool } // corpusEntryData returns the raw input bytes, either from the data struct // field, or from disk. func corpusEntryData(ce CorpusEntry) ([]byte, error) { if ce.Data != nil { return ce.Data, nil } return os.ReadFile(ce.Path) } type fuzzInput struct { // entry is the value to test initially. The worker will randomly mutate // values from this starting point. entry CorpusEntry // timeout is the time to spend fuzzing variations of this input, // not including starting or cleaning up. timeout time.Duration // limit is the maximum number of calls to the fuzz function the worker may // make. The worker may make fewer calls, for example, if it finds an // error early. If limit is zero, there is no limit on calls to the // fuzz function. limit int64 // warmup indicates whether this is a warmup input before fuzzing begins. If // true, the input should not be fuzzed. warmup bool // coverageData reflects the coordinator's current coverageMask. coverageData []byte } type fuzzResult struct { // entry is an interesting value or a crasher. entry CorpusEntry // crasherMsg is an error message from a crash. It's "" if no crash was found. crasherMsg string // canMinimize is true if the worker should attempt to minimize this result. // It may be false because an attempt has already been made. canMinimize bool // coverageData is set if the worker found new coverage. coverageData []byte // limit is the number of values the coordinator asked the worker // to test. 0 if there was no limit. limit int64 // count is the number of values the worker actually tested. count int64 // totalDuration is the time the worker spent testing inputs. totalDuration time.Duration // entryDuration is the time the worker spent execution an interesting result entryDuration time.Duration } type fuzzMinimizeInput struct { // entry is an interesting value or crasher to minimize. entry CorpusEntry // crasherMsg is an error message from a crash. It's "" if no crash was found. // If set, the worker will attempt to find a smaller input that also produces // an error, though not necessarily the same error. crasherMsg string // limit is the maximum number of calls to the fuzz function the worker may // make. The worker may make fewer calls, for example, if it can't reproduce // an error. If limit is zero, there is no limit on calls to the fuzz function. limit int64 // timeout is the time to spend minimizing this input. // A zero timeout means no limit. timeout time.Duration // keepCoverage is a set of coverage bits that entry found that were not in // the coordinator's combined set. When minimizing, the worker should find an // input that preserves at least one of these bits. keepCoverage is nil for // crashing inputs. keepCoverage []byte } // coordinator holds channels that workers can use to communicate with // the coordinator. type coordinator struct { opts CoordinateFuzzingOpts // startTime is the time we started the workers after loading the corpus. // Used for logging. startTime time.Time // inputC is sent values to fuzz by the coordinator. Any worker may receive // values from this channel. Workers send results to resultC. inputC chan fuzzInput // minimizeC is sent values to minimize by the coordinator. Any worker may // receive values from this channel. Workers send results to resultC. minimizeC chan fuzzMinimizeInput // resultC is sent results of fuzzing by workers. The coordinator // receives these. Multiple types of messages are allowed. resultC chan fuzzResult // count is the number of values fuzzed so far. count int64 // countLastLog is the number of values fuzzed when the output was last // logged. countLastLog int64 // timeLastLog is the time at which the output was last logged. timeLastLog time.Time // interestingCount is the number of unique interesting values which have // been found this execution. interestingCount int // warmupInputCount is the count of all entries in the corpus which will // need to be received from workers to run once during warmup, but not fuzz. // This could be for coverage data, or only for the purposes of verifying // that the seed corpus doesn't have any crashers. See warmupRun. warmupInputCount int // warmupInputLeft is the number of entries in the corpus which still need // to be received from workers to run once during warmup, but not fuzz. // See warmupInputLeft. warmupInputLeft int // duration is the time spent fuzzing inside workers, not counting time // starting up or tearing down. duration time.Duration // countWaiting is the number of fuzzing executions the coordinator is // waiting on workers to complete. countWaiting int64 // corpus is a set of interesting values, including the seed corpus and // generated values that workers reported as interesting. corpus corpus // minimizationAllowed is true if one or more of the types of fuzz // function's parameters can be minimized. minimizationAllowed bool // inputQueue is a queue of inputs that workers should try fuzzing. This is // initially populated from the seed corpus and cached inputs. More inputs // may be added as new coverage is discovered. inputQueue queue // minimizeQueue is a queue of inputs that caused errors or exposed new // coverage. Workers should attempt to find smaller inputs that do the // same thing. minimizeQueue queue // crashMinimizing is the crash that is currently being minimized. crashMinimizing *fuzzResult // coverageMask aggregates coverage that was found for all inputs in the // corpus. Each byte represents a single basic execution block. Each set bit // within the byte indicates that an input has triggered that block at least // 1 << n times, where n is the position of the bit in the byte. For example, a // value of 12 indicates that separate inputs have triggered this block // between 4-7 times and 8-15 times. coverageMask []byte } func newCoordinator(opts CoordinateFuzzingOpts) (*coordinator, error) { // Make sure all of the seed corpus has marshalled data. for i := range opts.Seed { if opts.Seed[i].Data == nil && opts.Seed[i].Values != nil { opts.Seed[i].Data = marshalCorpusFile(opts.Seed[i].Values...) } } c := &coordinator{ opts: opts, startTime: time.Now(), inputC: make(chan fuzzInput), minimizeC: make(chan fuzzMinimizeInput), resultC: make(chan fuzzResult), timeLastLog: time.Now(), corpus: corpus{hashes: make(map[[sha256.Size]byte]bool)}, } if err := c.readCache(); err != nil { return nil, err } if opts.MinimizeLimit > 0 || opts.MinimizeTimeout > 0 { for _, t := range opts.Types { if isMinimizable(t) { c.minimizationAllowed = true break } } } covSize := len(coverage()) if covSize == 0 { fmt.Fprintf(c.opts.Log, "warning: the test binary was not built with coverage instrumentation, so fuzzing will run without coverage guidance and may be inefficient\n") // Even though a coverage-only run won't occur, we should still run all // of the seed corpus to make sure there are no existing failures before // we start fuzzing. c.warmupInputCount = len(c.opts.Seed) for _, e := range c.opts.Seed { c.inputQueue.enqueue(e) } } else { c.warmupInputCount = len(c.corpus.entries) for _, e := range c.corpus.entries { c.inputQueue.enqueue(e) } // Set c.coverageMask to a clean []byte full of zeros. c.coverageMask = make([]byte, covSize) } c.warmupInputLeft = c.warmupInputCount if len(c.corpus.entries) == 0 { fmt.Fprintf(c.opts.Log, "warning: starting with empty corpus\n") var vals []any for _, t := range opts.Types { vals = append(vals, zeroValue(t)) } data := marshalCorpusFile(vals...) h := sha256.Sum256(data) name := fmt.Sprintf("%x", h[:4]) c.addCorpusEntries(false, CorpusEntry{Path: name, Data: data}) } return c, nil } func (c *coordinator) updateStats(result fuzzResult) { c.count += result.count c.countWaiting -= result.limit c.duration += result.totalDuration } func (c *coordinator) logStats() { now := time.Now() if c.warmupRun() { runSoFar := c.warmupInputCount - c.warmupInputLeft if coverageEnabled { fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, gathering baseline coverage: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) } else { fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, testing seed corpus: %d/%d completed\n", c.elapsed(), runSoFar, c.warmupInputCount) } } else if c.crashMinimizing != nil { fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, minimizing\n", c.elapsed()) } else { rate := float64(c.count-c.countLastLog) / now.Sub(c.timeLastLog).Seconds() if coverageEnabled { total := c.warmupInputCount + c.interestingCount fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec), new interesting: %d (total: %d)\n", c.elapsed(), c.count, rate, c.interestingCount, total) } else { fmt.Fprintf(c.opts.Log, "fuzz: elapsed: %s, execs: %d (%.0f/sec)\n", c.elapsed(), c.count, rate) } } c.countLastLog = c.count c.timeLastLog = now } // peekInput returns the next value that should be sent to workers. // If the number of executions is limited, the returned value includes // a limit for one worker. If there are no executions left, peekInput returns // a zero value and false. // // peekInput doesn't actually remove the input from the queue. The caller // must call sentInput after sending the input. // // If the input queue is empty and the coverage/testing-only run has completed, // queue refills it from the corpus. func (c *coordinator) peekInput() (fuzzInput, bool) { if c.opts.Limit > 0 && c.count+c.countWaiting >= c.opts.Limit { // Already making the maximum number of calls to the fuzz function. // Don't send more inputs right now. return fuzzInput{}, false } if c.inputQueue.len == 0 { if c.warmupRun() { // Wait for coverage/testing-only run to finish before sending more // inputs. return fuzzInput{}, false } c.refillInputQueue() } entry, ok := c.inputQueue.peek() if !ok { panic("input queue empty after refill") } input := fuzzInput{ entry: entry.(CorpusEntry), timeout: workerFuzzDuration, warmup: c.warmupRun(), } if c.coverageMask != nil { input.coverageData = bytes.Clone(c.coverageMask) } if input.warmup { // No fuzzing will occur, but it should count toward the limit set by // -fuzztime. input.limit = 1 return input, true } if c.opts.Limit > 0 { input.limit = c.opts.Limit / int64(c.opts.Parallel) if c.opts.Limit%int64(c.opts.Parallel) > 0 { input.limit++ } remaining := c.opts.Limit - c.count - c.countWaiting if input.limit > remaining { input.limit = remaining } } return input, true } // sentInput updates internal counters after an input is sent to c.inputC. func (c *coordinator) sentInput(input fuzzInput) { c.inputQueue.dequeue() c.countWaiting += input.limit } // refillInputQueue refills the input queue from the corpus after it becomes // empty. func (c *coordinator) refillInputQueue() { for _, e := range c.corpus.entries { c.inputQueue.enqueue(e) } } // queueForMinimization creates a fuzzMinimizeInput from result and adds it // to the minimization queue to be sent to workers. func (c *coordinator) queueForMinimization(result fuzzResult, keepCoverage []byte) { if result.crasherMsg != "" { c.minimizeQueue.clear() } input := fuzzMinimizeInput{ entry: result.entry, crasherMsg: result.crasherMsg, keepCoverage: keepCoverage, } c.minimizeQueue.enqueue(input) } // peekMinimizeInput returns the next input that should be sent to workers for // minimization. func (c *coordinator) peekMinimizeInput() (fuzzMinimizeInput, bool) { if !c.canMinimize() { // Already making the maximum number of calls to the fuzz function. // Don't send more inputs right now. return fuzzMinimizeInput{}, false } v, ok := c.minimizeQueue.peek() if !ok { return fuzzMinimizeInput{}, false } input := v.(fuzzMinimizeInput) if c.opts.MinimizeTimeout > 0 { input.timeout = c.opts.MinimizeTimeout } if c.opts.MinimizeLimit > 0 { input.limit = c.opts.MinimizeLimit } else if c.opts.Limit > 0 { if input.crasherMsg != "" { input.limit = c.opts.Limit } else { input.limit = c.opts.Limit / int64(c.opts.Parallel) if c.opts.Limit%int64(c.opts.Parallel) > 0 { input.limit++ } } } if c.opts.Limit > 0 { remaining := c.opts.Limit - c.count - c.countWaiting if input.limit > remaining { input.limit = remaining } } return input, true } // sentMinimizeInput removes an input from the minimization queue after it's // sent to minimizeC. func (c *coordinator) sentMinimizeInput(input fuzzMinimizeInput) { c.minimizeQueue.dequeue() c.countWaiting += input.limit } // warmupRun returns true while the coordinator is running inputs without // mutating them as a warmup before fuzzing. This could be to gather baseline // coverage data for entries in the corpus, or to test all of the seed corpus // for errors before fuzzing begins. // // The coordinator doesn't store coverage data in the cache with each input // because that data would be invalid when counter offsets in the test binary // change. // // When gathering coverage, the coordinator sends each entry to a worker to // gather coverage for that entry only, without fuzzing or minimizing. This // phase ends when all workers have finished, and the coordinator has a combined // coverage map. func (c *coordinator) warmupRun() bool { return c.warmupInputLeft > 0 } // updateCoverage sets bits in c.coverageMask that are set in newCoverage. // updateCoverage returns the number of newly set bits. See the comment on // coverageMask for the format. func (c *coordinator) updateCoverage(newCoverage []byte) int { if len(newCoverage) != len(c.coverageMask) { panic(fmt.Sprintf("number of coverage counters changed at runtime: %d, expected %d", len(newCoverage), len(c.coverageMask))) } newBitCount := 0 for i := range newCoverage { diff := newCoverage[i] &^ c.coverageMask[i] newBitCount += bits.OnesCount8(diff) c.coverageMask[i] |= newCoverage[i] } return newBitCount } // canMinimize returns whether the coordinator should attempt to find smaller // inputs that reproduce a crash or new coverage. func (c *coordinator) canMinimize() bool { return c.minimizationAllowed && (c.opts.Limit == 0 || c.count+c.countWaiting < c.opts.Limit) } func (c *coordinator) elapsed() time.Duration { return time.Since(c.startTime).Round(1 * time.Second) } // readCache creates a combined corpus from seed values and values in the cache // (in GOCACHE/fuzz). // // TODO(fuzzing): need a mechanism that can remove values that // aren't useful anymore, for example, because they have the wrong type. func (c *coordinator) readCache() error { if _, err := c.addCorpusEntries(false, c.opts.Seed...); err != nil { return err } entries, err := ReadCorpus(c.opts.CacheDir, c.opts.Types) if err != nil { if _, ok := err.(*MalformedCorpusError); !ok { // It's okay if some files in the cache directory are malformed and // are not included in the corpus, but fail if it's an I/O error. return err } // TODO(jayconrod,katiehockman): consider printing some kind of warning // indicating the number of files which were skipped because they are // malformed. } if _, err := c.addCorpusEntries(false, entries...); err != nil { return err } return nil } // MalformedCorpusError is an error found while reading the corpus from the // filesystem. All of the errors are stored in the errs list. The testing // framework uses this to report malformed files in testdata. type MalformedCorpusError struct { errs []error } func (e *MalformedCorpusError) Error() string { var msgs []string for _, s := range e.errs { msgs = append(msgs, s.Error()) } return strings.Join(msgs, "\n") } // ReadCorpus reads the corpus from the provided dir. The returned corpus // entries are guaranteed to match the given types. Any malformed files will // be saved in a MalformedCorpusError and returned, along with the most recent // error. func ReadCorpus(dir string, types []reflect.Type) ([]CorpusEntry, error) { files, err := os.ReadDir(dir) if os.IsNotExist(err) { return nil, nil // No corpus to read } else if err != nil { return nil, fmt.Errorf("reading seed corpus from testdata: %v", err) } var corpus []CorpusEntry var errs []error for _, file := range files { // TODO(jayconrod,katiehockman): determine when a file is a fuzzing input // based on its name. We should only read files created by writeToCorpus. // If we read ALL files, we won't be able to change the file format by // changing the extension. We also won't be able to add files like // README.txt explaining why the directory exists. if file.IsDir() { continue } filename := filepath.Join(dir, file.Name()) data, err := os.ReadFile(filename) if err != nil { return nil, fmt.Errorf("failed to read corpus file: %v", err) } var vals []any vals, err = readCorpusData(data, types) if err != nil { errs = append(errs, fmt.Errorf("%q: %v", filename, err)) continue } corpus = append(corpus, CorpusEntry{Path: filename, Values: vals}) } if len(errs) > 0 { return corpus, &MalformedCorpusError{errs: errs} } return corpus, nil } func readCorpusData(data []byte, types []reflect.Type) ([]any, error) { vals, err := unmarshalCorpusFile(data) if err != nil { return nil, fmt.Errorf("unmarshal: %v", err) } if err = CheckCorpus(vals, types); err != nil { return nil, err } return vals, nil } // CheckCorpus verifies that the types in vals match the expected types // provided. func CheckCorpus(vals []any, types []reflect.Type) error { if len(vals) != len(types) { return fmt.Errorf("wrong number of values in corpus entry: %d, want %d", len(vals), len(types)) } valsT := make([]reflect.Type, len(vals)) for valsI, v := range vals { valsT[valsI] = reflect.TypeOf(v) } for i := range types { if valsT[i] != types[i] { return fmt.Errorf("mismatched types in corpus entry: %v, want %v", valsT, types) } } return nil } // writeToCorpus atomically writes the given bytes to a new file in testdata. If // the directory does not exist, it will create one. If the file already exists, // writeToCorpus will not rewrite it. writeToCorpus sets entry.Path to the new // file that was just written or an error if it failed. func writeToCorpus(entry *CorpusEntry, dir string) (err error) { sum := fmt.Sprintf("%x", sha256.Sum256(entry.Data))[:16] entry.Path = filepath.Join(dir, sum) if err := os.MkdirAll(dir, 0777); err != nil { return err } if err := os.WriteFile(entry.Path, entry.Data, 0666); err != nil { os.Remove(entry.Path) // remove partially written file return err } return nil } func testName(path string) string { return filepath.Base(path) } func zeroValue(t reflect.Type) any { for _, v := range zeroVals { if reflect.TypeOf(v) == t { return v } } panic(fmt.Sprintf("unsupported type: %v", t)) } var zeroVals []any = []any{ []byte(""), string(""), false, byte(0), rune(0), float32(0), float64(0), int(0), int8(0), int16(0), int32(0), int64(0), uint(0), uint8(0), uint16(0), uint32(0), uint64(0), } var debugInfo = godebug.New("fuzzdebug").Value() == "1" func shouldPrintDebugInfo() bool { return debugInfo }