Skip to content

Instantly share code, notes, and snippets.

@klauspost
Last active March 28, 2016 13:13
Show Gist options
  • Save klauspost/00f7c9a19e56581f5ead to your computer and use it in GitHub Desktop.
Save klauspost/00f7c9a19e56581f5ead to your computer and use it in GitHub Desktop.
package main
// Adapted from : https://gist.github.com/arnehormann/65421048f56ac108f6b5
import (
"bufio"
"encoding/binary"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"runtime"
"time"
flstd "compress/flate"
gzstd "compress/gzip"
"github.com/biogo/hts/bgzf"
flkp "github.com/klauspost/compress/flate"
gzkp "github.com/klauspost/compress/gzip"
pgz "github.com/klauspost/pgzip"
"golang.org/x/build/pargzip"
"github.com/golang/snappy"
"github.com/klauspost/dedup"
"github.com/klauspost/readahead"
//"github.com/rasky/go-lzo"
"github.com/youtube/vitess/go/cgzip"
)
type NoOp struct{}
func (n NoOp) Read(v []byte) (int, error) {
return len(v), nil
}
func (n NoOp) Write(v []byte) (int, error) {
return len(v), nil
}
type SeqGen struct {
i int
}
func (s *SeqGen) Read(v []byte) (int, error) {
b := byte(s.i)
for i := range v {
v[i], b = b, b+1
}
return len(v), nil
}
type Rand struct {
// uses PCG (http://www.pcg-random.org/)
state uint64
inc uint64
}
const pcgmult64 = 6364136223846793005
func NewRand(seed uint64) *Rand {
state := uint64(0)
inc := uint64(seed<<1) | 1
state = state*pcgmult64 + (inc | 1)
state += uint64(seed)
state = state*pcgmult64 + (inc | 1)
return &Rand{
state: state,
inc: inc,
}
}
func (r *Rand) Read(v []byte) (int, error) {
for w := v; len(w) > 0; w = w[4:] {
old := r.state
r.state = r.state*pcgmult64 + (r.inc | 1)
xorshifted := uint32(((old >> 18) ^ old) >> 27)
rot := uint32(old >> 59)
rnd := (xorshifted >> rot) | (xorshifted << ((-rot) & 31))
// ok because len(v) % 4 == 0
binary.LittleEndian.PutUint32(w, rnd)
}
return len(v), nil
}
type wcounter struct {
n int
out io.Writer
}
func (w *wcounter) Write(p []byte) (n int, err error) {
n, err = w.out.Write(p)
w.n += n
return n, err
}
func main() {
/* defer func() {
if p := recover(); p != nil {
var msg string
switch err := p.(type) {
case error:
msg = err.Error()
case string:
msg = err
default:
os.Stderr.WriteString("unknown type in panic")
os.Exit(1)
}
os.Stderr.WriteString(msg)
os.Exit(1)
}
}()
*/
rmode := "raw"
wmode := "gzkp"
wlevel := -1
in := "-"
out := "-"
cpu := 0
stats := false
header := true
flag.StringVar(&rmode, "r", rmode, "read mode (raw|flatekp|flatestd|gzkp|pgzip|cgzip|gzstd|zero|seq|rand)")
flag.StringVar(&wmode, "w", wmode, "write mode (raw|flatekp|flatestd|gzkp|pgzip|gzstd|cgzip|none)")
flag.StringVar(&in, "in", rmode, "input file name, default is '-', stdin")
flag.StringVar(&out, "out", rmode, "input file name, default is '-', stdin")
flag.IntVar(&wlevel, "l", wlevel, "compression level (-2|-1|0..9)")
flag.IntVar(&cpu, "cpu", cpu, "GOMAXPROCS number (0|1...)")
flag.BoolVar(&stats, "stats", false, "show stats")
flag.BoolVar(&header, "header", true, "show stats header")
flag.Parse()
if flag.NArg() > 0 {
flag.PrintDefaults()
}
if cpu <= 0 {
cpu = runtime.NumCPU()
}
runtime.GOMAXPROCS(cpu)
if wlevel < -2 || 9 < wlevel {
panic("compression level -l=x must be (-2,0..9)")
}
var err error
var r io.Reader
if in == "-" {
r = os.Stdin
} else {
r, err = os.Open(in)
if err != nil {
panic(err)
}
r, _ = readahead.NewReaderSize(r, 10, 10<<20)
}
var source bool
switch rmode {
case "zero":
// NoOp writes what the original buffer contained unchanged.
// As that buffer is initialized with 0 and not changed,
// NoOp is usable as a very fast zero-reader.
r = NoOp{}
source = true
case "seq":
r = &SeqGen{}
source = true
case "rand":
r = NewRand(0xdeadbeef)
source = true
case "raw":
case "gzkp":
var gzr *gzkp.Reader
if gzr, err = gzkp.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "bgzf":
var gzr *bgzf.Reader
if gzr, err = bgzf.NewReader(r, cpu); err == nil {
defer gzr.Close()
r = gzr
}
case "pgzip":
var gzr *pgz.Reader
if gzr, err = pgz.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "cgzip":
var gzr io.ReadCloser
if gzr, err = cgzip.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "gzstd":
var gzr *gzstd.Reader
if gzr, err = gzstd.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "flatekp":
fr := flkp.NewReader(r)
defer fr.Close()
r = fr
case "flatestd":
fr := flstd.NewReader(r)
defer fr.Close()
r = fr
default:
panic("read mode -r=x must be (raw|flatekp|flatestd|gzkp|gzstd|zero|seq|rand)")
}
if err != nil {
panic(err)
}
var w io.Writer
if out == "-" {
w = os.Stdout
} else if out == "*" {
w = ioutil.Discard
out = "discard"
} else {
f, err := os.Create(out)
if err != nil {
panic(err)
}
w = bufio.NewWriter(f)
}
outSize := &wcounter{out: w}
w = outSize
var sink bool
switch wmode {
case "none":
w = NoOp{}
sink = true
case "raw":
case "gzkp":
var gzw *gzkp.Writer
if gzw, err = gzkp.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
w = gzw
}
case "pgzip":
var gzw *pgz.Writer
if gzw, err = pgz.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
w = gzw
}
case "bgzf":
var gzw *bgzf.Writer
if gzw, err = bgzf.NewWriterLevel(w, wlevel, cpu); err == nil {
defer gzw.Close()
w = gzw
}
case "pargzip":
var gzw *pargzip.Writer
gzw = pargzip.NewWriter(w)
gzw.UseSystemGzip = false
defer gzw.Close()
w = gzw
case "cgzip":
var gzw *cgzip.Writer
if gzw, err = cgzip.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
w = gzw
}
case "gzstd":
var gzw *gzstd.Writer
if gzw, err = gzstd.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
w = gzw
}
case "dedup":
var ddw dedup.Writer
if ddw, err = dedup.NewStreamWriter(w, dedup.ModeDynamic, 1024, 0); err == nil {
defer ddw.Close()
w = ddw
}
case "snappy":
sw := snappy.NewWriter(w)
w = sw
/* case "lzo1x":
sw := lzo.NewWriter(w, wlevel)
w = sw*/
case "flatekp":
var fw *flkp.Writer
if fw, err = flkp.NewWriter(w, wlevel); err == nil {
defer fw.Close()
w = fw
}
case "flatestd":
var fw *flstd.Writer
if fw, err = flstd.NewWriter(w, wlevel); err == nil {
defer fw.Close()
w = fw
}
default:
panic("write mode -w=x must be (raw|flatekp|flatestd|gzkp|pgzip|gzstd|none)")
}
if err != nil {
panic(err)
}
if source && sink {
return
}
inSize := int64(0)
start := time.Now()
func() {
for _, mc := range []interface{}{r, w} {
if c, ok := mc.(io.Closer); ok {
defer c.Close()
}
}
nr, err := io.Copy(w, r)
inSize += nr
if err != nil && err != io.EOF {
panic(err)
}
}()
if stats {
elapsed := time.Since(start)
if header {
fmt.Printf("file\tin\tout\tlevel\tcpu\tinsize\toutsize\tmillis\tmb/s\n")
}
mbpersec := (float64(inSize) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
fmt.Printf("%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%.02f\n", in, rmode, wmode, wlevel, cpu, inSize, outSize.n, elapsed/time.Millisecond, mbpersec)
}
}
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=-2
SET LEVEL=1
compress -in=%1 -out=* -stats -header=true -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=2
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=3
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=4
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=5
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=6
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=7
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=8
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=9
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=-2
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pargzip" -l=0 >>results.txt
echo.>>results.txt
go build compress.go
SET LEVEL=-2
compress -in=%1 -out=* -stats -header=true -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=1
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=2
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=3
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=4
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=5
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=6
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=7
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=8
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=9
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment