Last active
September 30, 2020 22:41
-
-
Save klauspost/e407415bee6d1ef6ce3d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
// Adapted from : https://gist.github.com/arnehormann/65421048f56ac108f6b5 | |
import ( | |
"encoding/binary" | |
"flag" | |
"fmt" | |
"io" | |
"io/ioutil" | |
"os" | |
"path/filepath" | |
"runtime" | |
"runtime/pprof" | |
"time" | |
//flstd "compress/flate" | |
gzstd "compress/gzip" | |
//flkp "github.com/klauspost/compress/flate" | |
gzkp "github.com/klauspost/compress/gzip" | |
pgz "github.com/klauspost/pgzip" | |
"github.com/youtube/vitess/go/cgzip" | |
) | |
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to file") | |
type CgzipWriter struct { | |
*cgzip.Writer | |
level int | |
} | |
func NewCgWriterLevel(w io.Writer, level int) (*CgzipWriter, error) { | |
cw, err := cgzip.NewWriterLevel(w, level) | |
if err != nil { | |
return nil, err | |
} | |
return &CgzipWriter{Writer: cw, level: level}, nil | |
} | |
func (c *CgzipWriter) Reset(w io.Writer) { | |
var err error | |
c.Writer, err = cgzip.NewWriterLevel(w, c.level) | |
if err != nil { | |
panic(err) | |
} | |
} | |
type NoOp struct{} | |
func (n NoOp) Read(v []byte) (int, error) { | |
return len(v), nil | |
} | |
func (n NoOp) Write(v []byte) (int, error) { | |
return len(v), nil | |
} | |
type SeqGen struct { | |
i int | |
} | |
func (s *SeqGen) Read(v []byte) (int, error) { | |
b := byte(s.i) | |
for i := range v { | |
v[i], b = b, b+1 | |
} | |
return len(v), nil | |
} | |
type Rand struct { | |
// uses PCG (http://www.pcg-random.org/) | |
state uint64 | |
inc uint64 | |
} | |
const pcgmult64 = 6364136223846793005 | |
func NewRand(seed uint64) *Rand { | |
state := uint64(0) | |
inc := uint64(seed<<1) | 1 | |
state = state*pcgmult64 + (inc | 1) | |
state += uint64(seed) | |
state = state*pcgmult64 + (inc | 1) | |
return &Rand{ | |
state: state, | |
inc: inc, | |
} | |
} | |
func (r *Rand) Read(v []byte) (int, error) { | |
for w := v; len(w) > 0; w = w[4:] { | |
old := r.state | |
r.state = r.state*pcgmult64 + (r.inc | 1) | |
xorshifted := uint32(((old >> 18) ^ old) >> 27) | |
rot := uint32(old >> 59) | |
rnd := (xorshifted >> rot) | (xorshifted << ((-rot) & 31)) | |
// ok because len(v) % 4 == 0 | |
binary.LittleEndian.PutUint32(w, rnd) | |
} | |
return len(v), nil | |
} | |
type wcounter struct { | |
n int | |
out io.Writer | |
} | |
func (w *wcounter) Write(p []byte) (n int, err error) { | |
n, err = w.out.Write(p) | |
w.n += n | |
return n, err | |
} | |
type Encoder interface { | |
Reset(w io.Writer) | |
Flush() error | |
Close() error | |
Write(p []byte) (int, error) | |
} | |
func main() { | |
/* defer func() { | |
if p := recover(); p != nil { | |
var msg string | |
switch err := p.(type) { | |
case error: | |
msg = err.Error() | |
case string: | |
msg = err | |
default: | |
os.Stderr.WriteString("unknown type in panic") | |
os.Exit(1) | |
} | |
os.Stderr.WriteString(msg) | |
os.Exit(1) | |
} | |
}() | |
*/ | |
rmode := "raw" | |
wmode := "gzkp" | |
wlevel := -1 | |
in := "" | |
out := "*" | |
cpu := 0 | |
stats := false | |
header := true | |
reset := true | |
times := 1 | |
flag.StringVar(&rmode, "r", rmode, "read mode (raw|flatekp|flatestd|gzkp|pgzip|cgzip|gzstd|zero|seq|rand)") | |
flag.StringVar(&wmode, "w", wmode, "write mode (raw|flatekp|flatestd|gzkp|pgzip|gzstd|cgzip|none)") | |
flag.StringVar(&in, "in", in, "input file directory") | |
flag.StringVar(&out, "out", out, "output file name, default is '-', stdout") | |
flag.IntVar(&wlevel, "l", wlevel, "compression level (-1|0..9)") | |
flag.IntVar(&cpu, "cpu", cpu, "GOMAXPROCS number (0|1...)") | |
flag.IntVar(×, "times", times, "Run the test n times (1...)") | |
flag.BoolVar(&stats, "stats", false, "show stats") | |
flag.BoolVar(&header, "header", true, "show stats header") | |
flag.BoolVar(&reset, "reset", true, "use reset to avoid recreating encoder on every file") | |
flag.Parse() | |
if flag.NArg() > 0 { | |
flag.PrintDefaults() | |
} | |
if cpu <= 0 { | |
cpu = runtime.NumCPU() | |
} | |
runtime.GOMAXPROCS(cpu) | |
if *cpuprofile != "" { | |
f, err := os.Create(*cpuprofile) | |
if err != nil { | |
panic(err) | |
} | |
pprof.StartCPUProfile(f) | |
defer pprof.StopCPUProfile() | |
} | |
if wlevel < -2 || 9 < wlevel { | |
panic("compression level -l=x must be (-2,-1,,0..9)") | |
} | |
var err error | |
if len(in) == 0 { | |
panic("No input set") | |
} | |
allfiles := make([][]byte, 0, 1000) | |
filepath.Walk(in, func(path string, info os.FileInfo, err error) error { | |
if info != nil && !info.IsDir() { | |
c, err := ioutil.ReadFile(path) | |
if err != nil { | |
panic(err) | |
} | |
allfiles = append(allfiles, c) | |
} | |
return nil | |
}) | |
/* var source bool | |
switch rmode { | |
case "zero": | |
// NoOp writes what the original buffer contained unchanged. | |
// As that buffer is initialized with 0 and not changed, | |
// NoOp is usable as a very fast zero-reader. | |
r = NoOp{} | |
source = true | |
case "seq": | |
r = &SeqGen{} | |
source = true | |
case "rand": | |
r = NewRand(0xdeadbeef) | |
source = true | |
case "raw": | |
case "gzkp": | |
var gzr *gzkp.Reader | |
if gzr, err = gzkp.NewReader(r); err == nil { | |
defer gzr.Close() | |
r = gzr | |
} | |
case "pgzip": | |
var gzr *pgz.Reader | |
if gzr, err = pgz.NewReader(r); err == nil { | |
defer gzr.Close() | |
r = gzr | |
} | |
case "cgzip": | |
var gzr io.ReadCloser | |
if gzr, err = cgzip.NewReader(r); err == nil { | |
defer gzr.Close() | |
r = gzr | |
} | |
case "gzstd": | |
var gzr *gzstd.Reader | |
if gzr, err = gzstd.NewReader(r); err == nil { | |
defer gzr.Close() | |
r = gzr | |
} | |
case "flatekp": | |
fr := flkp.NewReader(r) | |
defer fr.Close() | |
r = fr | |
case "flatestd": | |
fr := flstd.NewReader(r) | |
defer fr.Close() | |
r = fr | |
default: | |
panic("read mode -r=x must be (raw|flatekp|flatestd|gzkp|gzstd|zero|seq|rand)") | |
} | |
if err != nil { | |
panic(err) | |
} | |
*/ | |
var w io.Writer | |
if out == "-" { | |
w = os.Stdout | |
} else if out == "*" { | |
w = ioutil.Discard | |
out = "discard" | |
} else { | |
w, err = os.Create(out) | |
if err != nil { | |
panic(err) | |
} | |
} | |
outSize := &wcounter{out: w} | |
w = outSize | |
var enc Encoder | |
var create func() Encoder | |
switch wmode { | |
case "gzkp": | |
var gzw *gzkp.Writer | |
if gzw, err = gzkp.NewWriterLevel(w, wlevel); err == nil { | |
defer gzw.Close() | |
enc = gzw | |
} | |
create = func() Encoder { | |
e, _ := gzkp.NewWriterLevel(w, wlevel) | |
return e | |
} | |
case "pgzip": | |
var gzw *pgz.Writer | |
if gzw, err = pgz.NewWriterLevel(w, wlevel); err == nil { | |
defer gzw.Close() | |
enc = gzw | |
} | |
create = func() Encoder { | |
e, _ := pgz.NewWriterLevel(w, wlevel) | |
return e | |
} | |
case "cgzip": | |
var gzw *CgzipWriter | |
if gzw, err = NewCgWriterLevel(w, wlevel); err == nil { | |
defer gzw.Close() | |
enc = gzw | |
} | |
create = func() Encoder { | |
e, _ := NewCgWriterLevel(w, wlevel) | |
return e | |
} | |
case "gzstd": | |
var gzw *gzstd.Writer | |
if gzw, err = gzstd.NewWriterLevel(w, wlevel); err == nil { | |
defer gzw.Close() | |
enc = gzw | |
} | |
create = func() Encoder { | |
e, _ := gzstd.NewWriterLevel(w, wlevel) | |
return e | |
} | |
default: | |
panic("write mode -w=x must be (raw|flatekp|flatestd|gzkp|pgzip|gzstd|none)") | |
} | |
if err != nil { | |
panic(err) | |
} | |
inSize := 0 | |
start := time.Now() | |
encReset := func(b []byte, e Encoder, out io.Writer) error { | |
e.Reset(out) | |
n, err := e.Write(b) | |
if err != nil { | |
return err | |
} | |
if n != len(b) { | |
return io.ErrShortWrite | |
} | |
err = e.Close() | |
if err != nil { | |
return err | |
} | |
return nil | |
} | |
encCreate := func(b []byte, f func() Encoder) error { | |
e := f() | |
n, err := e.Write(b) | |
if err != nil { | |
return err | |
} | |
if n != len(b) { | |
return io.ErrShortWrite | |
} | |
err = e.Close() | |
if err != nil { | |
return err | |
} | |
return nil | |
} | |
_ = enc | |
_ = encReset | |
nfiles := 0 | |
for i := 0; i < times; i++ { | |
for _, b := range allfiles { | |
nfiles++ | |
inSize += len(b) | |
var err error | |
if reset { | |
err = encReset(b, enc, outSize) | |
} else { | |
err = encCreate(b, create) | |
} | |
if err != nil { | |
panic(err) | |
} | |
} | |
} | |
if stats { | |
elapsed := time.Since(start) | |
if header { | |
fmt.Printf("file\tout\treset\tlevel\tcpu\tfiles\tinsize\toutsize\tmillis\tmb/s\n") | |
} | |
mbpersec := (float64(inSize) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second))) | |
fmt.Printf("%s\t%s\t%t\t%d\t%d\t%d\t%d\t%d\t%d\t%.02f\n", in, wmode, reset, wlevel, cpu, nfiles, inSize, outSize.n, elapsed/time.Millisecond, mbpersec) | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
REM go tool pprof -output=cpu.png -png -focus=encFast compressdir.exe cpu.out | |
go build compressdir.go | |
SET LEVEL=-2 | |
compressdir -in=%1 -out=* -stats -header=true -w=%2 -l=%LEVEL% %3 >>results.txt | |
SET LEVEL=1 | |
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt | |
SET LEVEL=2 | |
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt | |
SET LEVEL=3 | |
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt | |
SET LEVEL=4 | |
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt | |
SET LEVEL=5 | |
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt | |
SET LEVEL=6 | |
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt | |
SET LEVEL=7 | |
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt | |
SET LEVEL=8 | |
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt | |
SET LEVEL=9 | |
compressdir -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% %3 >>results.txt |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment