Skip to content

Instantly share code, notes, and snippets.

@zhenjl
Last active April 21, 2016 12:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zhenjl/7560517 to your computer and use it in GitHub Desktop.
Save zhenjl/7560517 to your computer and use it in GitHub Desktop.
package main
import (
"bytes"
"code.google.com/p/snappy-go/snappy"
"github.com/cznic/zappy"
"compress/gzip"
"compress/lzw"
"flag"
"fmt"
lz4 "github.com/reducedb/go-lz4"
"io"
"io/ioutil"
"log"
"os"
"runtime"
"runtime/pprof"
"strings"
"time"
)
type paramList []string
type codecFunc func([]byte, []byte) ([]byte, error)
type Buffer struct {
b []byte
i int
}
func NewBuffer(p []byte) *Buffer {
return &Buffer{
b: p,
i: 0,
}
}
func (this *Buffer) Bytes() []byte {
return this.b[:this.i]
}
func (this *Buffer) Read(p []byte) (n int, err error) {
r := len(this.b) - this.i
if r == 0 {
return 0, io.EOF
}
if r < len(p) {
copy(p, this.b[this.i:])
this.i += r
return r, io.EOF
}
copy(p, this.b[this.i:this.i+len(p)])
this.i += len(p)
return len(p), nil
}
func (this *Buffer) Write(p []byte) (n int, err error) {
r := len(this.b) - this.i
if r == 0 {
return 0, fmt.Errorf("No more space")
}
if len(p) > r {
copy(this.b[this.i:], p[:len(p)-r])
this.i += len(p) - r
return len(p) - r, fmt.Errorf("Not enoughs pace")
}
copy(this.b[this.i:], p)
this.i += len(p)
return len(p), nil
}
var (
filesParam, dirsParam, codecsParam paramList
pprofParam bool
files []string
)
func (this *paramList) String() string {
return fmt.Sprint(*this)
}
func (this *paramList) Set(value string) error {
for _, f := range strings.Split(value, ",") {
*this = append(*this, f)
}
return nil
}
func init() {
flag.BoolVar(&pprofParam, "pprof", false, "Whether or not to write pprof output.")
flag.Var(&filesParam, "file", "Files to process. There can be multiple of this, or comma separated list.")
flag.Var(&dirsParam, "dir", "The directory containing a list of files. There can be multiple of this, or comma separated list.")
flag.Var(&codecsParam, "codec", "The codec to use: lzw, lz3, zappy, snappy, and gzip. There can be multiple of this, or comma separated list.")
}
func readFile(path string) ([]byte, error) {
file, err := os.Open(path)
if err != nil {
return nil, err
}
defer file.Close()
if strings.HasSuffix(path, ".gz") {
gunzip, err := gzip.NewReader(file)
if err != nil {
return nil, err
}
defer gunzip.Close()
return ioutil.ReadAll(gunzip)
}
return ioutil.ReadAll(file)
}
func getDirOfFiles(path string) ([]string, error) {
filenames := make([]string, 0, 10)
files, err := ioutil.ReadDir(path)
if err != nil {
return nil, err
}
for _, f := range files {
filenames = append(filenames, path+"/"+f.Name())
}
return filenames, nil
}
func loadFromFiles(files []string) ([][]byte, int, error) {
max := 0
data := make([][]byte, 0, len(files))
for _, f := range files {
fmt.Printf("Processing %s...", f)
res, err := readFile(f)
if err != nil {
return nil, 0, err
}
data = append(data, res)
if len(res) > max {
max = len(res)
}
fmt.Println("done.")
}
return data, max, nil
}
func getListOfFiles() []string {
files := make([]string, 0, 10)
for _, d := range dirsParam {
res, err := getDirOfFiles(d)
if err != nil {
log.Fatal(err)
}
files = append(files, res...)
}
files = append(files, filesParam...)
return files
}
func compress(codec codecFunc, in, out []byte, prof bool) (duration int, ret []byte, err error) {
now := time.Now()
if prof {
f, e := os.Create("cpu.compress.pprof")
if e != nil {
log.Fatal(e)
}
defer f.Close()
pprof.StartCPUProfile(f)
}
ret, err = codec(in, out)
since := time.Since(now).Nanoseconds()
if prof {
pprof.StopCPUProfile()
}
return int(since), ret, nil
}
func uncompress(codec codecFunc, in, out []byte, prof bool) (duration int, ret []byte, err error) {
now := time.Now()
if prof {
f, e := os.Create("cpu.uncompress.pprof")
if e != nil {
log.Fatal(e)
}
defer f.Close()
pprof.StartCPUProfile(f)
}
ret, err = codec(in, out)
since := time.Since(now).Nanoseconds()
if prof {
pprof.StopCPUProfile()
}
return int(since), ret, nil
}
func testCodecs(data [][]byte, max int, output bool) error {
compdata := make([]byte, max+max/3)
decompdata := make([]byte, max)
var (
compOut, decompOut []byte
err error
compTime, decompTime int
)
for i, in := range data {
for _, codec := range codecsParam {
switch codec {
case "lzw":
compTime, compOut, err = compress(lzwCompress, in, compdata, pprofParam)
if err != nil {
return err
}
decompTime, decompOut, err = uncompress(lzwUncompress, compOut, decompdata, pprofParam)
if err != nil {
return err
}
case "lz4":
compTime, compOut, err = compress(lz4Compress, in, compdata, pprofParam)
if err != nil {
return err
}
decompTime, decompOut, err = uncompress(lz4Uncompress, compOut, decompdata, pprofParam)
if err != nil {
return err
}
case "zappy":
compTime, compOut, err = compress(zappyCompress, in, compdata, pprofParam)
if err != nil {
return err
}
decompTime, decompOut, err = uncompress(zappyUncompress, compOut, decompdata, pprofParam)
if err != nil {
return err
}
case "snappy":
compTime, compOut, err = compress(snappyCompress, in, compdata, pprofParam)
if err != nil {
return err
}
decompTime, decompOut, err = uncompress(snappyUncompress, compOut, decompdata, pprofParam)
if err != nil {
return err
}
case "gzip":
compTime, compOut, err = compress(gzipCompress, in, compdata, pprofParam)
if err != nil {
return err
}
decompTime, decompOut, err = uncompress(gzipUncompress, compOut, decompdata, pprofParam)
if err != nil {
return err
}
default:
return fmt.Errorf("unknown codec %s", codec)
}
if !bytes.Equal(in, decompOut) {
return fmt.Errorf("roundtrip mismatch, size in = %d, size comp = %d, size decomp = %d\n", len(in), len(compOut), len(decompOut))
}
if output {
fmt.Printf("%-8s\t%-25s\t%2.2f%%\t%10d\t%5.2f\t%10d\t%5.2f\n", codec, files[i], float64(len(compOut))/float64(len(in))*100, compTime, (float64(len(in)) / (float64(compTime) / 1e9) / 1e6), decompTime, (float64(len(in)) / (float64(decompTime) / 1e9) / 1e6))
//fmt.Printf("%-8s\t%-25s\t%2.2f%%\t%10d ns\t%5.2f MB/s\t%10d ns\t%5.2f MB/s\n", codec, files[i], float64(len(compOut))/float64(len(in))*100, compTime, (float64(len(in)) / (float64(compTime) / 1e9) / 1e6), decompTime, (float64(len(in)) / (float64(decompTime) / 1e9) / 1e6))
}
runtime.GC()
}
}
return nil
}
func lz4Compress(in, out []byte) ([]byte, error) {
return lz4.Encode(out, in)
}
func lz4Uncompress(in, out []byte) ([]byte, error) {
return lz4.Decode(out, in)
}
func zappyCompress(in, out []byte) ([]byte, error) {
return zappy.Encode(out, in)
}
func zappyUncompress(in, out []byte) ([]byte, error) {
return zappy.Decode(out, in)
}
func snappyCompress(in, out []byte) ([]byte, error) {
return snappy.Encode(out, in)
}
func snappyUncompress(in, out []byte) ([]byte, error) {
return snappy.Decode(out, in)
}
func gzipCompress(in, out []byte) ([]byte, error) {
b := NewBuffer(out)
w := gzip.NewWriter(b)
_, err := w.Write(in)
w.Close()
return b.Bytes(), err
}
func gzipUncompress(in, out []byte) ([]byte, error) {
b := NewBuffer(in)
r, e := gzip.NewReader(b)
if e != nil {
return nil, e
}
defer r.Close()
recovered := out[0:]
total := 0
n := 100
var err error = nil
for err != io.EOF && n != 0 {
n, err = r.Read(recovered[total:])
total += n
}
if err != io.EOF && err != nil {
return nil, err
}
return recovered[:total], nil
}
func lzwCompress(in, out []byte) ([]byte, error) {
b := NewBuffer(out)
w := lzw.NewWriter(b, lzw.MSB, 8)
_, err := w.Write(in)
w.Close()
return b.Bytes(), err
}
func lzwUncompress(in, out []byte) ([]byte, error) {
b := NewBuffer(in)
r := lzw.NewReader(b, lzw.MSB, 8)
defer r.Close()
recovered := out[0:]
total := 0
n := 100
var err error = nil
for err != io.EOF && n != 0 {
n, err = r.Read(recovered[total:])
total += n
}
if err != io.EOF && err != nil {
return nil, err
}
return recovered[:total], nil
}
func main() {
flag.Parse()
files = getListOfFiles()
data, max, err := loadFromFiles(files)
if err != nil {
log.Fatal(err)
}
if err := testCodecs(data, max, false); err != nil {
log.Fatal(err)
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment