Skip to content

Instantly share code, notes, and snippets.

@ksurent
Created November 24, 2019 19:43
Show Gist options
  • Save ksurent/ce0146391e46b48794ff8c5f8c562f55 to your computer and use it in GitHub Desktop.
Save ksurent/ce0146391e46b48794ff8c5f8c562f55 to your computer and use it in GitHub Desktop.
A toy grammar–based fuzzer for bookingcom/carbonapi. Heavily inspired by The Fuzzing Book.
package main
import (
"flag"
"fmt"
"math"
"math/rand"
"os"
"runtime/debug"
"strings"
"time"
"github.com/bookingcom/carbonapi/expr"
"github.com/bookingcom/carbonapi/expr/functions"
"github.com/bookingcom/carbonapi/pkg/parser"
"golang.org/x/exp/ebnf"
)
var (
seed = flag.Int64("seed", time.Now().UnixNano(), "Random seed.")
dryrun = flag.Bool("dryrun", false, "Generate inputs but don't feed them into the parser.")
count = flag.Int("count", 100, "Number of inputs to generate in dry run (0 = infinity).")
maxtime = flag.Duration("maxtime", 0, "Maximum run time (0 = indefinite).")
)
func main() {
flag.Parse()
rand.Seed(*seed)
g, err := ebnf.Parse("", strings.NewReader(grmmr))
if err != nil {
errout(err)
}
if err := ebnf.Verify(g, "Top"); err != nil {
errout(err)
}
computeAlts(g, altps)
total, crashers := 0, 0
bytes := 0.0
t0 := time.Now()
functions.New(nil)
if *maxtime > 0 {
go func() {
time.Sleep(*maxtime)
fmt.Fprintln(os.Stderr, "Maximum run time reached, exiting.")
os.Exit(0)
}()
}
go func() {
for {
// Yes, this is race-y. It's fine.
dt := time.Since(t0)
rate := bytes / 1024 / 10 / dt.Seconds()
avglen := bytes / float64(total)
fmt.Fprintf(os.Stderr, "Runtime: %s; Inputs: %d (%.2f KiB/s; Avglen: %.2f B); Crashers: %d\n", dt.Truncate(time.Second), total, rate, avglen, crashers)
time.Sleep(10 * time.Second)
}
}()
for {
input := generate(g, "Top", altps)
total++
bytes += float64(len(input))
if *dryrun {
fmt.Fprintf(os.Stdout, "[%s]\n", input)
if *count > 0 && total >= *count {
break
}
} else {
if crashed, panic, stack := check(input); crashed {
save(input, panic, stack)
crashers++
}
}
}
}
func check(s string) (crashed bool, panic, stack string) {
defer func() {
if r := recover(); r != nil {
crashed = true
panic = fmt.Sprint(r)
stack = string(debug.Stack())
// Remove 'goroutine 1 [running]' and debug.Stack itself.
stack = stack[strings.IndexByte(stack, '\n')+1:]
stack = stack[strings.IndexByte(stack, '\n')+1:]
stack = stack[strings.IndexByte(stack, '\n')+1:]
return
}
}()
if e, _, err := parser.ParseExpr(s); err == nil {
if _, err = expr.EvalExpr(e, 0, 1, nil); err == nil {
return
}
}
return
}
func save(crasher, panic, stack string) {
fh, err := os.OpenFile("/tmp/crashers", os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0644)
if err != nil {
fmt.Fprintln(os.Stderr, "[", crasher, "]")
return
}
defer fh.Close()
fmt.Fprintf(fh, "found crasher:\n---\n[%s]\n---\n", crasher)
fmt.Fprintf(fh, "error:\n---\n%s\n%s\n---\n", panic, stack)
}
func generate(g ebnf.Grammar, start string, altps map[string][]float64) string {
// NOTE(asurikov): track stack depth?
var visit func(ebnf.Expression, []float64) string
visit = func(e ebnf.Expression, ps []float64) string {
switch ee := e.(type) {
case ebnf.Sequence:
var b strings.Builder
for i := range ee {
b.WriteString(visit(ee[i], ps))
}
return b.String()
case ebnf.Alternative:
return visit(ee[roll(ps)], ps)
case *ebnf.Group:
return visit(ee.Body, ps)
case *ebnf.Option:
// NOTE(asurikov): favour non-empty for shorter outputs?
if rand.Intn(100) >= 50 {
return visit(ee.Body, ps)
}
return ""
case *ebnf.Production:
return visit(ee.Expr, altps[ee.Name.String])
case *ebnf.Range:
begin, end := int(ee.Begin.String[0]), int(ee.End.String[0])
return string(begin + rand.Intn(end-begin))
case *ebnf.Repetition:
var b strings.Builder
for {
if rand.Float64() > repeatp {
break
}
b.WriteString(visit(ee.Body, ps))
}
return b.String()
case *ebnf.Name:
return visit(g[ee.String], ps)
case *ebnf.Token:
return ee.String
}
panic(fmt.Sprintf("%T: %v", e, e))
}
return visit(g[start], nil)
}
func errout(err error) {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
const grmmr = `
Top = Expression .
Expression = Metric | Call .
Metric = Node { NodeSep Node } .
Node = Word | Glob .
Word = Letter { Letter | Digit | Punct } .
Call = Function "(" ArgList ")" .
ArgList = [ Argument { ArgSep Argument } ] .
Argument = Literal | Metric | KeyValue | Call .
KeyValue = Word "=" Literal .
Literal = Bool | Number | QWord .
Number = RandomNum | InterestingNum .
RandomNum = [ Sign ] ( Integer | Float ).
Integer = LeadDigit { Digit } .
Float = Integer "." Integer .
QWord = "\"" Word "\"" .
Bool = "true" | "True" | "TRUE" | "false" | "False" | "FALSE" .
Sign = "-" .
LeadDigit = "1" … "9" .
Digit = "0" … "9" .
Letter = "a" … "z" | "A" … "Z" .
Glob = "*" .
NodeSep = "." .
Punct = "_" | "-" .
ArgSep = "," .
InterestingNum = "-1" | "0" | "1" | "2147483647" | "-2147483648" | "-9223372036854775808" | "9223372036854775807" .
Function = "absolute" | "alias" | "aliasByMetric" | "aliasByNode" | "aliasSub" | "asPercent" | "averageSeries" | "averageSeriesWithWildcards" | "below" | "cactiStyle" | "cairo" | "changed" | "consolidateBy" | "constantLine" | "countSeries" | "cumulative" | "delay" | "derivative" | "diffSeries" | "divideSeries" | "ewma" | "exclude" | "fallbackSeries" | "fft" | "grep" | "group" | "groupByNode" | "highest" | "hitcount" | "holtWintersAberration" | "holtWintersConfidenceBands" | "holtWintersForecast" | "ifft" | "integral" | "invert" | "isNotNull" | "keepLastValue" | "kolmogorovSmirnovTest2" | "legendValue" | "limit" | "linearRegression" | "logarithm" | "lowPass" | "lowest" | "mapSeries" | "minMax" | "mostDeviant" | "moving" | "movingMedian" | "multiplySeries" | "multiplySeriesWithWildcards" | "nPercentile" | "nonNegativeDerivative" | "offset" | "offsetToZero" | "pearson" | "pearsonClosest" | "perSecond" | "percentileOfSeries" | "polyfit" | "pow" | "randomWalk" | "rangeOfSeries" | "reduce" | "removeBelowSeries" | "removeEmptySeries" | "scale" | "scaleToSeconds" | "seriesList" | "sortBy" | "sortByName" | "squareRoot" | "stddevSeries" | "stdev" | "substr" | "sum" | "sumSeriesWithWildcards" | "summarize" | "timeFunction" | "timeShift" | "timeStack" | "transformNull" | "tukey" .
`
// Probability of every individual repetition execution to produce non–empty
// result. Higher probablity means more repetitions. At 0.9 I'm consistently
// getting stack overflows.
const repeatp = 0.85
// Maps probalitities to production rules. Make "holes" with math.NaN.
// Unmentioned rules will be split evenly between "remainder" probability.
var altps = map[string][]float64{
"Expression": {0.01, 0.99}, // Metric is 1%, Expression is 99%.
"Literal": {0, 1, 0}, // Bool is 0%, Number is 100%, QWord is 0%.
"Number": {math.NaN(), 0.99}, // RandomNum is 1%, InterestingNum is 99%.
}
func computeAlts(g ebnf.Grammar, altps map[string][]float64) {
// TODO(asurikov): support ranges.
// TODO(asurikov): support options.
for _, e := range g {
var l int
if alts, ok := e.Expr.(ebnf.Alternative); ok {
l = len(alts)
} else {
continue
}
if _, ok := altps[e.Name.String]; !ok {
altps[e.Name.String] = makeNaNs(l)
}
complete(altps[e.Name.String])
}
}
func complete(ps []float64) {
var unknown, psum float64
for _, p := range ps {
if math.IsNaN(p) {
unknown++
} else {
psum += p
}
}
defp := (1 - psum) / unknown
psum = 0
for i, p := range ps {
if math.IsNaN(p) {
ps[i] = defp
}
psum += ps[i]
}
const epsilon = 0.00001
if ok := math.Abs(1-psum) < epsilon; !ok {
panic("must add to 1")
}
}
func makeNaNs(n int) []float64 {
nans := make([]float64, n)
for i := 0; i < n; i++ {
nans[i] = math.NaN()
}
return nans
}
func roll(ps []float64) int {
var (
i int
sum float64
r = rand.Float64()
)
for _, p := range ps {
sum += p
if r < sum {
break
}
i++
}
return i
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment