Skip to content

Instantly share code, notes, and snippets.

@klauspost klauspost/compress.go
Last active Mar 28, 2016

Embed
What would you like to do?
package main
// Adapted from : https://gist.github.com/arnehormann/65421048f56ac108f6b5
import (
"bufio"
"encoding/binary"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"runtime"
"time"
flstd "compress/flate"
gzstd "compress/gzip"
"github.com/biogo/hts/bgzf"
flkp "github.com/klauspost/compress/flate"
gzkp "github.com/klauspost/compress/gzip"
pgz "github.com/klauspost/pgzip"
"golang.org/x/build/pargzip"
"github.com/golang/snappy"
"github.com/klauspost/dedup"
"github.com/klauspost/readahead"
//"github.com/rasky/go-lzo"
"github.com/youtube/vitess/go/cgzip"
)
type NoOp struct{}
func (n NoOp) Read(v []byte) (int, error) {
return len(v), nil
}
func (n NoOp) Write(v []byte) (int, error) {
return len(v), nil
}
type SeqGen struct {
i int
}
func (s *SeqGen) Read(v []byte) (int, error) {
b := byte(s.i)
for i := range v {
v[i], b = b, b+1
}
return len(v), nil
}
type Rand struct {
// uses PCG (http://www.pcg-random.org/)
state uint64
inc uint64
}
const pcgmult64 = 6364136223846793005
func NewRand(seed uint64) *Rand {
state := uint64(0)
inc := uint64(seed<<1) | 1
state = state*pcgmult64 + (inc | 1)
state += uint64(seed)
state = state*pcgmult64 + (inc | 1)
return &Rand{
state: state,
inc: inc,
}
}
func (r *Rand) Read(v []byte) (int, error) {
for w := v; len(w) > 0; w = w[4:] {
old := r.state
r.state = r.state*pcgmult64 + (r.inc | 1)
xorshifted := uint32(((old >> 18) ^ old) >> 27)
rot := uint32(old >> 59)
rnd := (xorshifted >> rot) | (xorshifted << ((-rot) & 31))
// ok because len(v) % 4 == 0
binary.LittleEndian.PutUint32(w, rnd)
}
return len(v), nil
}
type wcounter struct {
n int
out io.Writer
}
func (w *wcounter) Write(p []byte) (n int, err error) {
n, err = w.out.Write(p)
w.n += n
return n, err
}
func main() {
/* defer func() {
if p := recover(); p != nil {
var msg string
switch err := p.(type) {
case error:
msg = err.Error()
case string:
msg = err
default:
os.Stderr.WriteString("unknown type in panic")
os.Exit(1)
}
os.Stderr.WriteString(msg)
os.Exit(1)
}
}()
*/
rmode := "raw"
wmode := "gzkp"
wlevel := -1
in := "-"
out := "-"
cpu := 0
stats := false
header := true
flag.StringVar(&rmode, "r", rmode, "read mode (raw|flatekp|flatestd|gzkp|pgzip|cgzip|gzstd|zero|seq|rand)")
flag.StringVar(&wmode, "w", wmode, "write mode (raw|flatekp|flatestd|gzkp|pgzip|gzstd|cgzip|none)")
flag.StringVar(&in, "in", rmode, "input file name, default is '-', stdin")
flag.StringVar(&out, "out", rmode, "input file name, default is '-', stdin")
flag.IntVar(&wlevel, "l", wlevel, "compression level (-2|-1|0..9)")
flag.IntVar(&cpu, "cpu", cpu, "GOMAXPROCS number (0|1...)")
flag.BoolVar(&stats, "stats", false, "show stats")
flag.BoolVar(&header, "header", true, "show stats header")
flag.Parse()
if flag.NArg() > 0 {
flag.PrintDefaults()
}
if cpu <= 0 {
cpu = runtime.NumCPU()
}
runtime.GOMAXPROCS(cpu)
if wlevel < -2 || 9 < wlevel {
panic("compression level -l=x must be (-2,0..9)")
}
var err error
var r io.Reader
if in == "-" {
r = os.Stdin
} else {
r, err = os.Open(in)
if err != nil {
panic(err)
}
r, _ = readahead.NewReaderSize(r, 10, 10<<20)
}
var source bool
switch rmode {
case "zero":
// NoOp writes what the original buffer contained unchanged.
// As that buffer is initialized with 0 and not changed,
// NoOp is usable as a very fast zero-reader.
r = NoOp{}
source = true
case "seq":
r = &SeqGen{}
source = true
case "rand":
r = NewRand(0xdeadbeef)
source = true
case "raw":
case "gzkp":
var gzr *gzkp.Reader
if gzr, err = gzkp.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "bgzf":
var gzr *bgzf.Reader
if gzr, err = bgzf.NewReader(r, cpu); err == nil {
defer gzr.Close()
r = gzr
}
case "pgzip":
var gzr *pgz.Reader
if gzr, err = pgz.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "cgzip":
var gzr io.ReadCloser
if gzr, err = cgzip.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "gzstd":
var gzr *gzstd.Reader
if gzr, err = gzstd.NewReader(r); err == nil {
defer gzr.Close()
r = gzr
}
case "flatekp":
fr := flkp.NewReader(r)
defer fr.Close()
r = fr
case "flatestd":
fr := flstd.NewReader(r)
defer fr.Close()
r = fr
default:
panic("read mode -r=x must be (raw|flatekp|flatestd|gzkp|gzstd|zero|seq|rand)")
}
if err != nil {
panic(err)
}
var w io.Writer
if out == "-" {
w = os.Stdout
} else if out == "*" {
w = ioutil.Discard
out = "discard"
} else {
f, err := os.Create(out)
if err != nil {
panic(err)
}
w = bufio.NewWriter(f)
}
outSize := &wcounter{out: w}
w = outSize
var sink bool
switch wmode {
case "none":
w = NoOp{}
sink = true
case "raw":
case "gzkp":
var gzw *gzkp.Writer
if gzw, err = gzkp.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
w = gzw
}
case "pgzip":
var gzw *pgz.Writer
if gzw, err = pgz.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
w = gzw
}
case "bgzf":
var gzw *bgzf.Writer
if gzw, err = bgzf.NewWriterLevel(w, wlevel, cpu); err == nil {
defer gzw.Close()
w = gzw
}
case "pargzip":
var gzw *pargzip.Writer
gzw = pargzip.NewWriter(w)
gzw.UseSystemGzip = false
defer gzw.Close()
w = gzw
case "cgzip":
var gzw *cgzip.Writer
if gzw, err = cgzip.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
w = gzw
}
case "gzstd":
var gzw *gzstd.Writer
if gzw, err = gzstd.NewWriterLevel(w, wlevel); err == nil {
defer gzw.Close()
w = gzw
}
case "dedup":
var ddw dedup.Writer
if ddw, err = dedup.NewStreamWriter(w, dedup.ModeDynamic, 1024, 0); err == nil {
defer ddw.Close()
w = ddw
}
case "snappy":
sw := snappy.NewWriter(w)
w = sw
/* case "lzo1x":
sw := lzo.NewWriter(w, wlevel)
w = sw*/
case "flatekp":
var fw *flkp.Writer
if fw, err = flkp.NewWriter(w, wlevel); err == nil {
defer fw.Close()
w = fw
}
case "flatestd":
var fw *flstd.Writer
if fw, err = flstd.NewWriter(w, wlevel); err == nil {
defer fw.Close()
w = fw
}
default:
panic("write mode -w=x must be (raw|flatekp|flatestd|gzkp|pgzip|gzstd|none)")
}
if err != nil {
panic(err)
}
if source && sink {
return
}
inSize := int64(0)
start := time.Now()
func() {
for _, mc := range []interface{}{r, w} {
if c, ok := mc.(io.Closer); ok {
defer c.Close()
}
}
nr, err := io.Copy(w, r)
inSize += nr
if err != nil && err != io.EOF {
panic(err)
}
}()
if stats {
elapsed := time.Since(start)
if header {
fmt.Printf("file\tin\tout\tlevel\tcpu\tinsize\toutsize\tmillis\tmb/s\n")
}
mbpersec := (float64(inSize) / (1024 * 1024)) / (float64(elapsed) / (float64(time.Second)))
fmt.Printf("%s\t%s\t%s\t%d\t%d\t%d\t%d\t%d\t%.02f\n", in, rmode, wmode, wlevel, cpu, inSize, outSize.n, elapsed/time.Millisecond, mbpersec)
}
}
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=-2
SET LEVEL=1
compress -in=%1 -out=* -stats -header=true -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=2
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=3
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=4
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=5
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=6
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=7
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=8
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=9
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzstd" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="bgzf" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="cgzip" -l=%LEVEL% >>results.txt
SET LEVEL=-2
echo.>>results.txt
compress -in=%1 -out=* -stats -header=false -w="gzkp" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pgzip" -l=%LEVEL% >>results.txt
compress -in=%1 -out=* -stats -header=false -w="pargzip" -l=0 >>results.txt
echo.>>results.txt
go build compress.go
SET LEVEL=-2
compress -in=%1 -out=* -stats -header=true -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=1
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=2
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=3
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=4
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=5
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=6
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=7
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=8
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
SET LEVEL=9
compress -in=%1 -out=* -stats -header=false -w=%2 -l=%LEVEL% >>results.txt
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.