Skip to content

Instantly share code, notes, and snippets.

@klauspost
Last active November 21, 2019 12:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save klauspost/248df4f53a99e68c31c4c2137c27d8e8 to your computer and use it in GitHub Desktop.
Save klauspost/248df4f53a99e68c31c4c2137c27d8e8 to your computer and use it in GitHub Desktop.
package table
import (
"fmt"
"github.com/DataDog/zstd"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/y"
"github.com/golang/snappy"
"github.com/klauspost/compress/s2"
gozstd "github.com/klauspost/compress/zstd"
"github.com/pierrec/lz4/v3"
"github.com/stretchr/testify/require"
"io/ioutil"
"math/rand"
"os"
"testing"
"time"
)
func BenchmarkComp(b *testing.B) {
block, err := getTableForBenchmarks2(b, 1000).block(1)
require.NoError(b, err)
data := block.data
fmt.Println("data size", len(data))
if false {
ioutil.WriteFile("comp-data.bin", data, os.ModePerm)
}
var s2Compressed, snappyCompressed, LZ4Compressed, ZSTDCompressed []byte
{
buf := make([]byte, len(data))
snappyCompressed = snappy.Encode(buf, data)
}
{
buf := make([]byte, len(data))
s2Compressed = s2.Encode(buf, data)
}
{
buf := make([]byte, len(data))
lzht := make([]int, 64<<10) // buffer for the compression table
n, err := lz4.CompressBlock(data, buf, lzht)
if err != nil {
panic(err)
}
LZ4Compressed = buf[:n] // compressed data
}
{
if false {
d, err := zstd.Compress(nil, data)
if err != nil {
panic(err)
}
ZSTDCompressed = d
} else {
enc, err := gozstd.NewWriter(nil, gozstd.WithSingleSegment(true), gozstd.WithEncoderCRC(false), gozstd.WithEncoderLevel(gozstd.SpeedFastest))
if err != nil {
b.Fatal(err)
}
ZSTDCompressed = enc.EncodeAll(data, nil)
}
}
const (
showRatio = false
validate = false
)
b.ResetTimer()
b.Run("Compression", func(b *testing.B) {
b.Run("Snappy", func(b *testing.B) {
buf := make([]byte, snappy.MaxEncodedLen(len(data)))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
d := snappy.Encode(buf, data)
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio Snappy", float64(len(data))/float64(len(d)))
}
}
})
b.Run("S2", func(b *testing.B) {
buf := make([]byte, s2.MaxEncodedLen(len(data)))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
d := s2.Encode(buf, data)
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio S2", float64(len(data))/float64(len(d)))
}
}
})
b.Run("S2 Better", func(b *testing.B) {
buf := make([]byte, s2.MaxEncodedLen(len(data)))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
d := s2.EncodeBetter(buf, data)
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio S2", float64(len(data))/float64(len(d)))
}
}
})
b.Run("LZ4", func(b *testing.B) {
lzbuf := make([]byte, len(data))
lzht := make([]int, 64<<10) // buffer for the compression table
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
n, err := lz4.CompressBlock(data, lzbuf, lzht)
if err != nil {
panic(err)
}
lzbuf = lzbuf[:n] // compressed data
if n >= len(data) {
panic(fmt.Sprint("length ", n, len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio LZ4", float64(len(data))/float64(len(lzbuf)))
}
}
})
b.Run("ZSTD - Datadog", func(b *testing.B) {
buf := make([]byte, len(data))
b.ResetTimer()
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
d, err := zstd.Compress(buf, data)
if err != nil {
panic(err)
}
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio ZSTD", float64(len(data))/float64(len(d)))
}
}
})
b.Run("ZSTD - Go - Fastest", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
enc, err := gozstd.NewWriter(nil, gozstd.WithSingleSegment(true), gozstd.WithEncoderCRC(false), gozstd.WithEncoderLevel(gozstd.SpeedFastest))
if err != nil {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
d := enc.EncodeAll(data, buf[:0])
if err != nil {
panic(err)
}
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio Go ZSTD", float64(len(data))/float64(len(d)))
}
}
})
b.Run("ZSTD - Go - Default", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
enc, err := gozstd.NewWriter(nil, gozstd.WithSingleSegment(true), gozstd.WithEncoderCRC(false), gozstd.WithEncoderLevel(gozstd.SpeedDefault))
if err != nil {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
d := enc.EncodeAll(data, buf[:0])
if err != nil {
panic(err)
}
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio Go ZSTD", float64(len(data))/float64(len(d)))
}
}
})
})
b.Run("Decompression", func(b *testing.B) {
b.Run("Snappy", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
d, err := snappy.Decode(buf, snappyCompressed)
if err != nil {
panic(err)
}
_ = d
if validate && i == 0 {
require.Equal(b, d, data)
}
}
})
b.Run("S2", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
d, err := s2.Decode(buf, s2Compressed)
if err != nil {
panic(err)
}
_ = d
if validate && i == 0 {
require.Equal(b, d, data)
}
}
})
b.Run("LZ4", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
n, err := lz4.UncompressBlock(LZ4Compressed, buf)
if err != nil {
fmt.Println(err)
}
buf = buf[:n] // uncompressed data
if validate && i == 0 {
require.Equal(b, buf, data)
}
}
})
b.Run("ZSTD - Datadog", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
d, err := zstd.Decompress(buf, ZSTDCompressed)
if err != nil {
panic(err)
}
_ = d
if validate && i == 0 {
require.Equal(b, d, data)
}
}
})
b.Run("ZSTD - Go", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
dec, err := gozstd.NewReader(nil)
if err != nil {
panic(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
d, err := dec.DecodeAll(ZSTDCompressed, buf[:0])
if err != nil {
panic(err)
}
_ = d
if validate && i == 0 {
require.Equal(b, d, data)
}
}
})
})
}
func getTableForBenchmarks2(b *testing.B, count int) *Table {
rand.Seed(time.Now().Unix())
opts := Options{LoadingMode: options.LoadToRAM, Compression: options.ZSTD, BlockSize: 4 * 1024, BloomFalsePositive: 0.01}
builder := NewTableBuilder(opts)
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
require.NoError(b, err)
// Use deterministic source
rng := rand.New(rand.NewSource(0xabad1dea))
for i := 0; i < count; i++ {
k := fmt.Sprintf("%016x", rng.Uint64())
v := fmt.Sprintf(`{"value":"%d","another":"%016x","key-%x":%t}`, i, rng.Uint32(), rng.Uint32(), rng.Uint32()&1==0)
//k := fmt.Sprintf("%016x", i)
//v := fmt.Sprintf(`%d`, i)
builder.Add([]byte(k), y.ValueStruct{Value: []byte(v)})
}
_, err = f.Write(builder.Finish())
require.NoError(b, err, "unable to write to file")
tbl, err := OpenTable(f, opts)
require.NoError(b, err, "unable to open table")
return tbl
}
module github.com/dgraph-io/badger/v2
go 1.12
require (
github.com/DataDog/zstd v1.4.1
github.com/cespare/xxhash v1.1.0
github.com/dgraph-io/ristretto v0.0.0-20191025175511-c1f00be0418e
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2
github.com/dustin/go-humanize v1.0.0
github.com/golang/protobuf v1.3.1
github.com/golang/snappy v0.0.1
github.com/klauspost/compress v1.9.2
github.com/pierrec/lz4/v3 v3.1.0
github.com/pkg/errors v0.8.1
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/spf13/cobra v0.0.5
github.com/stretchr/testify v1.4.0
golang.org/x/net v0.0.0-20190620200207-3b0461eec859
golang.org/x/sys v0.0.0-20190626221950-04f50cda93cb
gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment