Skip to content

Instantly share code, notes, and snippets.

@jarifibrahim
Created December 26, 2019 10:37
Show Gist options
  • Save jarifibrahim/91920e93d1ecac3006b269e0c05d6a24 to your computer and use it in GitHub Desktop.
Save jarifibrahim/91920e93d1ecac3006b269e0c05d6a24 to your computer and use it in GitHub Desktop.
package table
import (
"fmt"
"io/ioutil"
"math/rand"
"os"
"testing"
"time"
"github.com/DataDog/zstd"
"github.com/dgraph-io/badger/v2/options"
"github.com/dgraph-io/badger/v2/y"
"github.com/golang/snappy"
gozstd "github.com/klauspost/compress/zstd"
"github.com/pierrec/lz4/v3"
"github.com/stretchr/testify/require"
)
const (
showRatio = false
validate = false
)
func BenchmarkComp(b *testing.B) {
//block, err := getTableForBenchmarks2(b, 1000).block(1)
//require.NoError(b, err)
//data := block.data
data, err := ioutil.ReadFile("./moby.txt")
fmt.Println("data size", len(data))
require.NoError(b, err)
if false {
ioutil.WriteFile("comp-data.bin", data, os.ModePerm)
}
var snappyCompressed, LZ4Compressed, ZSTDCompressed []byte
{
buf := make([]byte, len(data))
snappyCompressed = snappy.Encode(buf, data)
}
{
buf := make([]byte, len(data))
lzht := make([]int, 64<<10) // buffer for the compression table
n, err := lz4.CompressBlock(data, buf, lzht)
if err != nil {
panic(err)
}
LZ4Compressed = buf[:n] // compressed data
}
{
if false {
d, err := zstd.CompressLevel(nil, data, 1)
if err != nil {
panic(err)
}
ZSTDCompressed = d
} else {
enc, err := gozstd.NewWriter(nil, gozstd.WithEncoderCRC(false), gozstd.WithEncoderLevel(gozstd.SpeedFastest))
if err != nil {
b.Fatal(err)
}
ZSTDCompressed = enc.EncodeAll(data, nil)
}
}
b.ResetTimer()
b.Run("Compression", func(b *testing.B) {
b.Run("Snappy", func(b *testing.B) {
buf := make([]byte, snappy.MaxEncodedLen(len(data)))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
d := snappy.Encode(buf, data)
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio Snappy", float64(len(data))/float64(len(d)))
}
}
})
b.Run("LZ4", func(b *testing.B) {
lzbuf := make([]byte, len(data))
lzht := make([]int, 64<<10) // buffer for the compression table
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
n, err := lz4.CompressBlock(data, lzbuf, lzht)
if err != nil {
panic(err)
}
lzbuf = lzbuf[:n] // compressed data
if n >= len(data) {
panic(fmt.Sprint("length ", n, len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio LZ4", float64(len(data))/float64(len(lzbuf)))
}
}
})
b.Run("ZSTD - Datadog-level1", func(b *testing.B) {
buf := make([]byte, len(data))
b.ResetTimer()
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
d, err := zstd.CompressLevel(buf, data, 1)
if err != nil {
panic(err)
}
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio ZSTD", float64(len(data))/float64(len(d)))
}
}
})
b.Run("ZSTD - Datadog-level3", func(b *testing.B) {
buf := make([]byte, len(data))
b.ResetTimer()
b.SetBytes(int64(len(data)))
for i := 0; i < b.N; i++ {
d, err := zstd.CompressLevel(buf, data, 3)
if err != nil {
panic(err)
}
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio ZSTD", float64(len(data))/float64(len(d)))
}
}
})
b.Run("ZSTD - Go - level1", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
enc, err := gozstd.NewWriter(nil, gozstd.WithEncoderCRC(false), gozstd.WithEncoderLevel(gozstd.SpeedFastest))
if err != nil {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
d := enc.EncodeAll(data, buf[:0])
if err != nil {
panic(err)
}
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio Go ZSTD", float64(len(data))/float64(len(d)))
}
}
})
b.Run("ZSTD - Go - Default", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
enc, err := gozstd.NewWriter(nil, gozstd.WithSingleSegment(true), gozstd.WithEncoderCRC(false), gozstd.WithEncoderLevel(gozstd.SpeedDefault))
if err != nil {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
d := enc.EncodeAll(data, buf[:0])
if err != nil {
panic(err)
}
if len(d) == 0 {
panic(d)
}
if len(d) >= len(data) {
panic(fmt.Sprint("length ", len(d), len(data)))
}
if showRatio && i == 0 {
fmt.Println("Compression Ratio Go ZSTD", float64(len(data))/float64(len(d)))
}
}
})
})
b.Run("Decompression", func(b *testing.B) {
b.Run("Snappy", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
d, err := snappy.Decode(buf, snappyCompressed)
if err != nil {
panic(err)
}
_ = d
if validate && i == 0 {
require.Equal(b, d, data)
}
}
})
b.Run("LZ4", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
n, err := lz4.UncompressBlock(LZ4Compressed, buf)
if err != nil {
fmt.Println(err)
}
buf = buf[:n] // uncompressed data
if validate && i == 0 {
require.Equal(b, buf, data)
}
}
})
b.Run("ZSTD - Datadog", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
d, err := zstd.Decompress(buf, ZSTDCompressed)
if err != nil {
panic(err)
}
_ = d
if validate && i == 0 {
require.Equal(b, d, data)
}
}
})
b.Run("ZSTD - Go", func(b *testing.B) {
buf := make([]byte, len(data))
b.SetBytes(int64(len(data)))
dec, err := gozstd.NewReader(nil)
if err != nil {
panic(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
d, err := dec.DecodeAll(ZSTDCompressed, buf[:0])
if err != nil {
panic(err)
}
_ = d
if validate && i == 0 {
require.Equal(b, d, data)
}
}
})
})
}
func getTableForBenchmarks2(b *testing.B, count int) *Table {
rand.Seed(time.Now().Unix())
opts := Options{LoadingMode: options.LoadToRAM, Compression: options.ZSTD, BlockSize: 4 * 1024, BloomFalsePositive: 0.01}
builder := NewTableBuilder(opts)
filename := fmt.Sprintf("%s%s%d.sst", os.TempDir(), string(os.PathSeparator), rand.Int63())
f, err := y.OpenSyncedFile(filename, true)
require.NoError(b, err)
// Use deterministic source
rng := rand.New(rand.NewSource(0xabad1dea))
for i := 0; i < count; i++ {
k := fmt.Sprintf("%016x", rng.Uint64())
v := fmt.Sprintf(`{"value":"%d","another":"%016x","key-%x":%t}`, i, rng.Uint32(), rng.Uint32(), rng.Uint32()&1 == 0)
//k := fmt.Sprintf("%016x", i)
//v := fmt.Sprintf(`%d`, i)
builder.Add([]byte(k), y.ValueStruct{Value: []byte(v)}, 0)
}
_, err = f.Write(builder.Finish())
require.NoError(b, err, "unable to write to file")
tbl, err := OpenTable(f, opts)
require.NoError(b, err, "unable to open table")
return tbl
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment