Skip to content

Instantly share code, notes, and snippets.

@rybit
Created March 18, 2020 00:17
Show Gist options
  • Save rybit/78787472a41f3f85dc0f31d3d3b368be to your computer and use it in GitHub Desktop.
Save rybit/78787472a41f3f85dc0f31d3d3b368be to your computer and use it in GitHub Desktop.
a script to take some CSV data and do percentiles
package main
import (
"bufio"
"fmt"
"log"
"math"
"os"
"sort"
"strconv"
"strings"
)
func main() {
fname := os.Args[1]
f, err := os.OpenFile(fname, os.O_RDONLY, 0777)
if err != nil {
panic(err)
}
defer f.Close()
log.Print("Starting to read", fname)
errs := make(map[string]int)
counts := make(map[string]int)
percentiles := []float64{0.50, 0.75, 0.90, 0.99}
buckets := map[string][]float64{
"tiny": []float64{},
"small": []float64{},
"medium": []float64{},
"large": []float64{},
"huge": []float64{},
"files": []float64{},
}
scan := bufio.NewScanner(f)
for scan.Scan() {
parts := strings.SplitN(scan.Text(), ",", 3)
if len(parts) != 3 {
errs["too_short"]++
continue
}
msec, err := strconv.ParseFloat(strings.ReplaceAll(parts[1], "\"", ""), 64)
if err != nil {
errs["bad_msec"]++
continue
}
files, err := strconv.Atoi(strings.ReplaceAll(parts[2], "\"", ""))
if err != nil {
errs["bad_file_count"]++
continue
}
var bucketName string
switch {
case files < 100:
bucketName = "tiny"
case files < 500:
bucketName = "small"
case files < 5000:
bucketName = "medium"
case files < 10000:
bucketName = "large"
default:
// fmt.Println(files, "\t", msec)
bucketName = "huge"
}
buckets[bucketName] = append(buckets[bucketName], msec)
counts[bucketName]++
buckets["files"] = append(buckets["files"], msec)
counts["files"]++
}
log.Println("Finished reading lines")
log.Println("-------------- REPORT -----------")
reportStr := "percentiles:"
for _, p := range percentiles {
reportStr += fmt.Sprintf("\t%.02f", p)
}
reportStr += "\tcount"
log.Println(reportStr)
printRes("tiny", percentiles, buckets, counts)
printRes("small", percentiles, buckets, counts)
printRes("medium", percentiles, buckets, counts)
printRes("large", percentiles, buckets, counts)
printRes("huge", percentiles, buckets, counts)
printRes("files", percentiles, buckets, counts)
log.Printf("errors %+v", errs)
}
func printRes(name string, percentiles []float64, data map[string][]float64, counts map[string]int) {
reportStr := fmt.Sprintf("# %s:", name)
for _, percentile := range percentiles {
var res float64
series := data[name]
if len(series) > 0 {
res = SamplePercentile(float64Slice(series), percentile)
}
reportStr += fmt.Sprintf("\t%.04f", res)
}
reportStr += fmt.Sprintf("\t%d", counts[name])
log.Println(reportStr)
}
// stolen from https://play.golang.org/p/3LOGgbEjcW
type float64Slice []float64
func (p float64Slice) Len() int { return len(p) }
func (p float64Slice) Less(i, j int) bool { return p[i] < p[j] }
func (p float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func SamplePercentile(values float64Slice, perc float64) float64 {
ps := []float64{perc}
scores := make([]float64, len(ps))
size := len(values)
if size > 0 {
sort.Sort(values)
for i, p := range ps {
pos := p * float64(size+1) //ALTERNATIVELY, DROP THE +1
if pos < 1.0 {
scores[i] = float64(values[0])
} else if pos >= float64(size) {
scores[i] = float64(values[size-1])
} else {
lower := float64(values[int(pos)-1])
upper := float64(values[int(pos)])
scores[i] = lower + (pos-math.Floor(pos))*(upper-lower)
}
}
}
return scores[0]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment