Skip to content

Instantly share code, notes, and snippets.

@arnehormann
Last active August 29, 2015 14:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arnehormann/65421048f56ac108f6b5 to your computer and use it in GitHub Desktop.
Save arnehormann/65421048f56ac108f6b5 to your computer and use it in GitHub Desktop.
Check speed and compression of `compress/flate` and `compress/gzip` against the `github.com/klauspost/compress` variants. Please read the comment below!
package main
import (
"encoding/binary"
"flag"
"fmt"
"io"
"os"
"runtime"
"sort"
"strconv"
"strings"
"time"
flstd "compress/flate"
gzstd "compress/gzip"
flkp "github.com/klauspost/compress/flate"
gzkp "github.com/klauspost/compress/gzip"
pgz "github.com/klauspost/pgzip"
)
var _ io.ReadCloser = (*readCloser)(nil)
type readCloser struct {
io.Reader
io.Closer
}
var _ io.ReadCloser = (*StatReader)(nil)
type StatReader struct {
R io.Reader
N int64
NoClose bool
}
func mapReader(r io.Reader, err error) (*StatReader, error) {
if err != nil {
return nil, err
}
return &StatReader{R: r}, nil
}
func (r *StatReader) Read(d []byte) (int, error) {
n, err := r.R.Read(d)
r.N += int64(n)
return n, err
}
func (r *StatReader) Close() error {
if c, ok := r.R.(io.Closer); !r.NoClose && ok {
return c.Close()
}
return nil
}
var _ io.WriteCloser = (*StatWriter)(nil)
type StatWriter struct {
W io.Writer
N int64
NoClose bool
}
func mapWriter(w io.Writer, err error) (*StatWriter, error) {
if err != nil {
return nil, err
}
return &StatWriter{W: w}, nil
}
func (w *StatWriter) Write(d []byte) (int, error) {
n, err := w.W.Write(d)
w.N += int64(n)
return n, err
}
func (w *StatWriter) Close() error {
if c, ok := w.W.(io.Closer); !w.NoClose && ok {
return c.Close()
}
return nil
}
type NoOp struct{}
func (n NoOp) Read(v []byte) (int, error) {
return len(v), nil
}
func (n NoOp) Write(v []byte) (int, error) {
return len(v), nil
}
type SeqGen struct {
i int
}
func (s *SeqGen) Read(v []byte) (int, error) {
b := byte(s.i)
for i := range v {
v[i], b = b, b+1
}
return len(v), nil
}
type Rand struct {
// uses PCG (http://www.pcg-random.org/)
state uint64
inc uint64
}
const pcgmult64 = 6364136223846793005
func NewRand(seed uint64) *Rand {
state := uint64(0)
inc := uint64(seed<<1) | 1
state = state*pcgmult64 + (inc | 1)
state += uint64(seed)
state = state*pcgmult64 + (inc | 1)
return &Rand{
state: state,
inc: inc,
}
}
func (r *Rand) Read(v []byte) (int, error) {
for w := v; len(w) > 0; w = w[4:] {
old := r.state
r.state = r.state*pcgmult64 + (r.inc | 1)
xorshifted := uint32(((old >> 18) ^ old) >> 27)
rot := uint32(old >> 59)
rnd := (xorshifted >> rot) | (xorshifted << ((-rot) & 31))
// ok because len(v) % 4 == 0
binary.LittleEndian.PutUint32(w, rnd)
}
return len(v), nil
}
var _ flag.Value = (*NumBytes)(nil)
type NumBytes int64
func (n *NumBytes) String() string {
if *n == 0 {
return "all"
}
return strconv.FormatInt(int64(*n), 10)
}
func (n *NumBytes) Set(v string) error {
if v == "all" {
*n = 0
return nil
}
f := int64(1)
if len(v) > 0 {
switch v[len(v)-1] {
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
case 'k', 'K':
f = 1024
case 'm', 'M':
f = 1024 * 1024
case 'g', 'G':
f = 1024 * 1024 * 1024
case 't', 'T':
f = 1024 * 1024 * 1024 * 1024
case 'p', 'P':
f = 1024 * 1024 * 1024 * 1024 * 1024
case 'e', 'E':
f = 1024 * 1024 * 1024 * 1024 * 1024 * 1024
default:
return fmt.Errorf("invalid byte size %q, available postfixes kilo..exa (k,m,g,t,p,e)", v)
}
v = v[:len(v)-1]
}
i, err := strconv.ParseInt(v, 10, 64)
if err != nil {
return err
}
*n = NumBytes(f * i)
return nil
}
var sources = map[string]func(string) (*StatReader, error){
"seq": func(string) (*StatReader, error) { return mapReader(&SeqGen{}, nil) },
"zero": func(string) (*StatReader, error) { return mapReader(NoOp{}, nil) },
"rand": func(string) (*StatReader, error) { return mapReader(NewRand(0xdeadbeef), nil) },
"file": func(src string) (*StatReader, error) {
if src == "-" {
return &StatReader{
R: os.Stdin,
NoClose: true,
}, nil
}
return mapReader(os.Open(src))
},
}
var extractors = map[string]func(*StatReader) (*StatReader, error){
"flatekp": func(r *StatReader) (*StatReader, error) { return mapReader(flkp.NewReader(r), nil) },
"flatestd": func(r *StatReader) (*StatReader, error) { return mapReader(flstd.NewReader(r), nil) },
"gzkp": func(r *StatReader) (*StatReader, error) { return mapReader(gzkp.NewReader(r)) },
"gzstd": func(r *StatReader) (*StatReader, error) { return mapReader(gzstd.NewReader(r)) },
"pgzip": func(r *StatReader) (*StatReader, error) { return mapReader(pgz.NewReader(r)) },
"none": func(r *StatReader) (*StatReader, error) { return &StatReader{R: r, NoClose: true}, nil },
}
var compressors = map[string]func(*StatWriter, int) (*StatWriter, error){
"flatekp": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(flkp.NewWriter(w, l)) },
"flatestd": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(flstd.NewWriter(w, l)) },
"gzkp": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(gzkp.NewWriterLevel(w, l)) },
"gzstd": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(gzstd.NewWriterLevel(w, l)) },
"pgzip": func(w *StatWriter, l int) (*StatWriter, error) { return mapWriter(pgz.NewWriterLevel(w, l)) },
"none": func(w *StatWriter, l int) (*StatWriter, error) { return &StatWriter{W: w, NoClose: true}, nil },
}
var sinks = map[string]func(string) (*StatWriter, error){
"none": func(string) (*StatWriter, error) { return mapWriter(NoOp{}, nil) },
"file": func(dest string) (*StatWriter, error) {
if dest == "-" {
return &StatWriter{
W: os.Stdout,
NoClose: true,
}, nil
}
return mapWriter(os.Create(dest))
},
}
var sourceOpts string
var extractorOpts string
var compressorOpts string
var sinkOpts string
func init() {
var tmp []string
for k, _ := range sources {
tmp = append(tmp, k)
}
sort.Strings(tmp)
sourceOpts, tmp = strings.Join(tmp, ","), tmp[:0]
for k, _ := range extractors {
tmp = append(tmp, k)
}
sort.Strings(tmp)
extractorOpts, tmp = strings.Join(tmp, ","), tmp[:0]
for k, _ := range compressors {
tmp = append(tmp, k)
}
sort.Strings(tmp)
compressorOpts, tmp = strings.Join(tmp, ","), tmp[:0]
for k, _ := range sinks {
tmp = append(tmp, k)
}
sort.Strings(tmp)
sinkOpts, tmp = strings.Join(tmp, ","), tmp[:0]
}
func helpAndQuit(fs *flag.FlagSet, v interface{}) {
var msg string
hasErr := true
switch err := v.(type) {
case error:
msg = err.Error()
case string:
msg = err
case bool:
// from help; no error
if err {
msg = "unknown flags: \n\t" + strings.Join(flag.Args(), "\n\t")
} else {
hasErr = false
}
default:
msg = "unknown type in panic"
}
if hasErr {
fmt.Fprintf(os.Stderr, "ERROR: "+msg+"\n\n")
}
fmt.Fprintln(os.Stderr, os.Args[0]+`:
This program helps to test the differences between implementations of
flate and gzip in the standard library and in github.com/klauspost/compress.
By default, it reads data from standard input and writes it to standard
output compressed with github.com/klauspost/compress/gzip.
Data is processed in a pipeline; each step is configurable and tracks the
number of bytes processed.
source(-s) => extract(-x) => compress(-c) => destination(-d)
The source can be a file (-s=file -i=FILENAME), the standard input stream
(-s=file -i=-), a repeating sequence of bytes between 0 and 255 (-s=seq),
a sequence of random bytes repeating each run (-s=rand), or an endless
stream of zeroes (-s=zero).
The amount of data read can be limited (-n=NUMBYTES).
The destination is either a file (-d=file -o=FILENAME), the standard output
stream (-d=file -o=-), or it is discarded (-d=none).
Compression (-c=...) and decompression/extraction (-x=...) can be disabled
(-c=none / -x=none) or set to a compression package.
- flatestd: compress/flate
- flatekp: github.com/klauspost/compress/flate
- gzstd: compress/gzip
- gzkp: github.com/klauspost/compress/gzip
- pgzip: github.com/klauspost/pgzip
For compression, the level is configurable, e.g. for fastest (-l=1) or for
best compression (-l=9).
The number of cpu cores set is also configurable (-cpus=2).
Statistics can be shown with (-stats), (-noheaders) suppresses the headers.
Available parameters:`)
fs.PrintDefaults()
os.Exit(-1)
}
func main() {
var (
r *StatReader
x *StatReader
c *StatWriter
w *StatWriter
err error
)
// flag configuration variables
var (
infile = "-"
outfile = "-"
src = "file"
dest = "file"
extmode = "none"
compmode = "gzkp"
complevel = -1
rmax = NumBytes(-1)
cpus = runtime.GOMAXPROCS(0)
stats = false
noHeaders = false
help = false
)
fs := flag.NewFlagSet(os.Args[0], flag.PanicOnError)
fs.StringVar(&infile, "i", infile, "input file; For stdin or non-file: '','-'")
fs.StringVar(&outfile, "o", outfile, "output file; For stdout or non-file: '','-'")
fs.StringVar(&src, "s", src, "source; requires '-i' for 'file'. One of "+sourceOpts)
fs.StringVar(&dest, "d", dest, "destination; requires '-o' for 'file'. One of "+sinkOpts)
fs.StringVar(&extmode, "x", extmode, "extract; One of "+extractorOpts)
fs.StringVar(&compmode, "c", compmode, "compress; One of "+compressorOpts)
fs.IntVar(&complevel, "l", complevel, "compression level (-1|0..9)")
fs.Var(&rmax, "n", "max bytes read, 0 for all")
fs.IntVar(&cpus, "cpus", cpus, "number of cpu cores used (< 0 for all)")
fs.BoolVar(&stats, "stats", stats, "show stats")
fs.BoolVar(&noHeaders, "noheaders", noHeaders, "suppress stats headers (ignored when stats is not set)")
fs.BoolVar(&help, "h", help, "show this help text")
fs.Parse(os.Args[1:])
if unknownFlags := fs.NArg() > 0; unknownFlags || help {
helpAndQuit(fs, unknownFlags)
}
if src == "" {
// map to stdin
src = "-"
}
if dest == "" {
// map to stdout
dest = "-"
}
if compmode == "none" {
complevel = 0
}
if cpus < 0 {
cpus = runtime.NumCPU()
}
runtime.GOMAXPROCS(cpus)
// basic sanity checks
if src != "file" && infile != "-" {
panic("input file must be '-' for non file source")
}
if dest != "file" && outfile != "-" {
panic("output file must be '-' for non file destination")
}
if complevel < -1 || 9 < complevel {
panic("compression level -l=x must be (-1,0..9)")
}
if rmax < 0 {
panic("max bytes read is too small")
}
// close all open readers and writers
defer func() {
for _, closer := range []io.Closer{x, c, r, w} {
if closer != nil {
closer.Close()
}
}
}()
if f, ok := sources[src]; ok {
if r, err = f(infile); err != nil {
panic("could not create reader: " + err.Error())
}
} else {
panic("source reader of type '" + src + "' is unsupported")
}
if f, ok := extractors[extmode]; ok {
if x, err = f(r); err != nil {
panic("could not create extractor: " + err.Error())
}
} else {
panic("source extractor of type '" + extmode + "' is unsupported")
}
if f, ok := sinks[dest]; ok {
if w, err = f(outfile); err != nil {
panic("could not create writer: " + err.Error())
}
} else {
panic("source writer of type '" + dest + "' is unsupported")
}
if f, ok := compressors[compmode]; ok {
if c, err = f(w, complevel); err != nil {
panic("could not create compressor: " + err.Error())
}
} else {
panic("compressor of type '" + compmode + "' is unsupported")
}
// optionally limit data
if rmax > -1 {
x.R = &readCloser{
Reader: &io.LimitedReader{
R: r, // r is x.R
N: int64(rmax),
},
Closer: r,
}
}
start := time.Now()
// adapted from io.Copy()
buf := make([]byte, 32*1024)
for {
nr, er := x.Read(buf)
if nr > 0 {
nw, ew := c.Write(buf[:nr])
if ew != nil {
panic(ew)
}
if nr != nw {
panic(io.ErrShortWrite)
}
}
if er == io.EOF {
break
}
if er != nil {
panic(er)
}
}
if stats {
took := time.Since(start)
mbpsIn := (float64(r.N) / (1024 * 1024)) / took.Seconds()
mbpsOut := (float64(w.N) / (1024 * 1024)) / took.Seconds()
var format string
if !noHeaders {
format = "" +
"type in\tfile in\tbytes in\t" +
"type extract\tbytes extracted\t" +
"type compress\tlevel\tbytes compressed\t" +
"type out\tfile out\tbytes out\t" +
"cpus\tmillis\t" +
"mb/s in\tmb/s out\n"
}
format += "" +
"%s\t%s\t%d\t" +
"%s\t%d\t" +
"%s\t%d\t%d\t" +
"%s\t%s\t%d\t" +
"%d\t%.03f\t" +
"%.02f\t%.02f\n"
fmt.Fprintf(os.Stderr, format,
src, infile, r.N,
extmode, x.N,
compmode, complevel, c.N,
dest, outfile, w.N,
runtime.GOMAXPROCS(0), took.Seconds()*1000,
mbpsIn, mbpsOut,
)
}
}
@arnehormann
Copy link
Author

UPDATE 2
Instructions below are for an old version. The program prints its help text with -h. It provides more detailed statistics and better control - and it's easier to adapt now.

It always works in a pipeline: read from a source, pass through decompressor, pass through compressor, write to destination. Each step is configurable and tracks the number processed bytes.


UPDATE 1
There's an adapted version friendlier to measurements on Windows, it can be found at https://gist.github.com/klauspost/00f7c9a19e56581f5ead


This is intended to be used with e.g. pv and time to measure the throughput.
It always reads from stdin and writes to stderr.
The default setting is pass raw input to klauspost gzip with default compression and write it to stdout.

Unknown arguments print a help text:

> # See available arguments by passing something unknown
> ./compress --help
Usage of ./compress:
  -l=-1: compression level (-1|0..9)
  -r="raw": read mode (raw|flatekp|flatestd|gzkp|gzstd|zero|seq|rand)
  -w="gzkp": write mode (raw|flatekp|flatestd|gzkp|gzstd|none)

Available producers / consumers:

  • raw: stdin / stdout
  • flatekp: stdin / stdout compressed with github.com/klauspost/compress/flate
  • flatestd: stdin / stdout compressed with compress/flate
  • gzkp: stdin / stdout compressed with github.com/klauspost/compress/gzip
  • gzstd: stdin / stdout compressed with compress/gzip
  • zero: an infinite stream of zero bytes
  • seq: a repeating stream of bytes 0..255
  • rand: a deterministic stream of random numbers (the same sequence each call)
  • none: data sink (/dev/null)

Some examples

# check speed of PRNG stream
./compress -r=rand -w=raw | pv > /dev/null

# check speed of PRNG stream compressed by klauspost gzip with level 9
./compress -r=rand -w=gzkp -l=9 | pv > /dev/null   

# check speed of PRNG stream compressed by stdlib gzip with level 9
./compress -r=rand -w=gzstd -l=9 | pv > /dev/null

# check speed of 0-stream compressed by stdlib flate with level 6
./compress -r=zero -w=flatestd -l=6 | pv > /dev/null

# same as above, but use external zero stream
./compress -r=raw -w=flatestd -l=6 < /dev/zero | pv > /dev/null

You can also pipe in a file and write it to .gz - and compare the contents and sizes after decompression.
Or you can do so on the fly. Here's an example for a file named testdata:

diff -q \
    <(./compress -r=raw -w=gzkp -l=9 < ./testdata | gzip -d) \
    <(gzip -c -d < ./testdata | ./compress -r=gzkp -w=raw)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment