Skip to content

Instantly share code, notes, and snippets.

@klauspost
Last active September 30, 2020 22:41
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save klauspost/e407415bee6d1ef6ce3d to your computer and use it in GitHub Desktop.
Save klauspost/e407415bee6d1ef6ce3d to your computer and use it in GitHub Desktop.
package main
// Adapted from : https://gist.github.com/arnehormann/65421048f56ac108f6b5
import (
"encoding/binary"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"path/filepath"
"runtime"
"runtime/pprof"
"time"
//flstd "compress/flate"
gzstd "compress/gzip"
//flkp "github.com/klauspost/compress/flate"
gzkp "github.com/klauspost/compress/gzip"
pgz "github.com/klauspost/pgzip"
"github.com/youtube/vitess/go/cgzip"
)
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file")
type CgzipWriter struct {
*cgzip.Writer
level int
}
func NewCgWriterLevel(w io.Writer, level int) (*CgzipWriter, error) {
cw, err := cgzip.NewWriterLevel(w, level)
if err != nil {
return nil, err
}
return &CgzipWriter{Writer: cw, level: level}, nil
}
func (c *CgzipWriter) Reset(w io.Writer) {
var err error
c.Writer, err = cgzip.NewWriterLevel(w, c.level)
if err != nil {
panic(err)
}
}
type NoOp struct{}
func (n NoOp) Read(v []byte) (int, error) {
return len(v), nil
}
func (n NoOp) Write(v []byte) (int, error) {
return len(v), nil
}
type SeqGen struct {
i int
}
func (s *SeqGen) Read(v []byte) (int, error) {
b := byte(s.i)
for i := range v {
v[i], b = b, b+1
}
return len(v), nil
}
type Rand struct {
// uses PCG (http://www.pcg-random.org/)
state uint64
inc uint64
}
const pcgmult64 = 6364136223846793005
func NewRand(seed uint64) *Rand {
state := uint64(0)
inc := uint64(seed<<1) | 1
state = state*pcgmult64 + (inc | 1)
state += uint64(seed)
state = state*pcgmult64 + (inc | 1)
return &Rand{
state: state,
inc: inc,
}
}
func (r *Rand) Read(v []byte) (int, error) {
for w := v; len(w) > 0; w = w[4:] {
old := r.state
r.state = r.state*pcgmult64 + (r.inc | 1)
xorshifted := uint32(((old >> 18) ^ old) >> 27)
rot := uint32(old >> 59)
rnd := (xorshifted >> rot) | (xorshifted << ((-rot) & 31))
// ok because len(v) % 4 == 0
binary.LittleEndian.PutUint32(w, rnd)
}
return len(v), nil
}
type wcounter struct {
n int
out io.Writer
}
func (w *wcounter) Write(p []byte) (n int, err error) {
n, err = w.out.Write(p)
w.n += n
return n, err
}
type Encoder interface {
Reset(w io.Writer)
Flush() error
Close() error
Write(p []byte) (int, error)
}
func main() {
/* defer func() {
if p := recover(); p != nil {
var msg string
switch err := p.(type) {
case error:
msg = err.Error()
case string:
msg = err
default:
os.Stderr.WriteString("unknown type in panic")
os.Exit(1)
}
os.Stderr.WriteString(msg)
os.Exit(1)
}
}()
*/
rmode := "raw"
wmode := "gzkp"
wlevel := -1
in := ""
out := "*"
cpu := 0
stats := false
header := true
reset := true
times := 1
flag.StringVar(&rmode, "r", rmode, "read mode (raw|flatekp|flatestd|gzkp|pgzip|cgzip|gzstd|zero|seq|rand)")
flag.StringVar(&wmode, "w", wmode, "write mode (raw|flatekp|flatestd|gzkp|pgzip|gzstd|cgzip|none)")
flag.StringVar(&in, "in", in, "input file directory")
flag.StringVar(&out, "out", out, "output file name, default is '-', stdout")
flag.IntVar(&wlevel, "l", wlevel, "compression level (-1|0..9)")
flag.IntVar(&cpu, "cpu", cpu, "GOMAXPROCS number (0|1...)")
flag.IntVar(&times, "times", times, "Run the test n times (1...)")
flag.BoolVar(&stats, "stats", false, "show stats")
flag.BoolVar(&header, "header", true, "show stats header")
flag.BoolVar(&reset, "reset", true, "use reset to avoid recreating encoder on every file")
flag.Parse()
if flag.NArg() > 0 {
flag.PrintDefaults()
}
if cpu <= 0 {
cpu = runtime.NumCPU()
}
runtime.GOMAXPROCS(cpu)
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
panic(err)
}
pprof.StartCPUProfile(f)
defer pprof.StopCPUProfile()
}
if wlevel < -2 || 9 < wlevel {
panic("compression level -l=x must be (-2,-1,,0..9)")
}
var err error
if len(in) == 0 {
panic("No input set")
}
allfiles := make([][]byte, 0, 1000)
filepath.Walk(in, func(path string, info os.FileInfo, err error) error {
if info != nil && !info.IsDir() {
c, err := ioutil.ReadFile(path)
if err != nil {
panic(err)
}
allfiles = append(allfiles, c)
}
return nil
})
/* var source bool
switch rmode {
case "zero":
// NoOp writes what the original buffer contained unchanged.
// As that buffer is initialized with 0 and not changed,
// NoOp is usable as a very fast zero-reader.
r = NoOp{}
source = true
case "seq":
r = &SeqGen{}
source = true
case "rand":
r = NewRand(0xdeadbeef)
source = true
case "raw":
case "gzkp":
var gzr *gzkp.Reader
if gzr, err = gzkp.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "pgzip":
var gzr *pgz.Reader
if gzr, err = pgz.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "cgzip":
var gzr io.ReadCloser
if gzr, err = cgzip.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "gzstd":
var gzr *gzstd.Reader
if gzr, err = gzstd.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "flatekp":
fr := flkp.NewReader(r)
defer fr.Close()
r = fr
case "flatestd":
fr := flstd.NewReader(r)
defer fr.Close()
r = fr
default:
panic("read mode -r=x must be (raw|flatekp|flatestd|gzkp|gzstd|zero|seq|rand)")
}
if err != nil {
panic(err)
}
*/
var w io.Writer
if out == "-" {
w = os.Stdout
} else if out == "*" {
w = ioutil.Discard
out = "discard"
} else {
w, err = os.Create(out)
if err != nil {
panic(err)
}
}
outSize := &wcounter{out: w}
w = outSize
var enc Encoder
var create func() Encoder
switch wmode {
case "gzkp":
var gzw *gzkp.Writer
if gzw, err = gzkp.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
enc = gzw
}
create = func() Encoder {
e, _ := gzkp.NewWriterLevel(w, wlevel)
return e
}
case "pgzip":
var gzw *pgz.Writer
if gzw, err = pgz.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
enc = gzw
}
create = func() Encoder {
e, _ := pgz.NewWriterLevel(w, wlevel)
return e
}
case "cgzip":
var gzw *CgzipWriter
if gzw, err = NewCgWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
enc = gzw
}
create = func() Encoder {
e, _ := NewCgWriterLevel(w, wlevel)
return e
}
case "gzstd":
var gzw *gzstd.Writer
if gzw, err = gzstd.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
enc = gzw
}
create = func() Encoder {
e, _ := gzstd.NewWriterLevel(w, wlevel)
return e
}
default:
panic("write mode -w=x must be (raw|flatekp|flatestd|gzkp|pgzip|gzstd|none)")
}
if err != nil {
panic(err)
}
inSize := 0
start := time.Now()
encReset := func(b []byte, e Encoder, out io.Writer) error {
e.Reset(out)
n, err := e.Write(b)
if err != nil {
return err
}
if n != len(b) {
return io.ErrShortWrite
}
err = e.Close()
if err != nil {
return err
}
return nil
}
encCreate := func(b []byte, f func() Encoder) error {
e := f()
n, err := e.Write(b)
if err != nil {
return err
}
if n != len(b) {
return io.ErrShortWrite
}
err = e.Close()
if err != nil {
return err
}
return nil
}
_ = enc
_ = encReset
nfiles := 0
for i := 0; i < times; i++ {
for _, b := range allfiles {
nfiles++
inSize += len(b)
var err error
if reset {
err = encReset(b, enc, outSize)
} else {
err = encCreate(b, create)
}
if err != nil {
panic(err)
}
}
}
if stats {
elapsed := time.Since(start)
if header {
fmt.Printf("file\tout\treset\tlevel\tcpu\tfiles\tinsize\toutsize\tmillis\tmb/s\n")
}
mbpersec := (float64(inSize) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
fmt.Printf("%s\t%s\t%t\t%d\t%d\t%d\t%d\t%d\t%d\t%.02f\n", in, wmode, reset, wlevel, cpu, nfiles, inSize, outSize.n, elapsed/time.Millisecond, mbpersec)
}
}
REM go tool pprof -output=cpu.png -png -focus=encFast compressdir.exe cpu.out
go build compressdir.go
SET LEVEL=-2
compressdir -in=%1 -out=* -stats -header=true -w=%2 -l=%LEVEL% %3 >>results.txt
SET LEVEL=1
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt
SET LEVEL=2
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt
SET LEVEL=3
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt
SET LEVEL=4
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt
SET LEVEL=5
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt
SET LEVEL=6
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt
SET LEVEL=7
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt
SET LEVEL=8
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt
SET LEVEL=9
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment