Skip to content

Instantly share code, notes, and snippets.

@achille-roussel
Created September 28, 2017 16:48
Show Gist options
  • Save achille-roussel/70d7b76be2e46b12d7acf6b463ce757e to your computer and use it in GitHub Desktop.
Save achille-roussel/70d7b76be2e46b12d7acf6b463ce757e to your computer and use it in GitHub Desktop.
Benchmark comparing performances of compressing and decompressing slices of KSUIDs using gzip and snappy.
package main
import (
"bytes"
"compress/gzip"
"reflect"
"testing"
"time"
"unsafe"
"github.com/golang/snappy"
"github.com/segmentio/ksuid"
)
func TestCompress(t *testing.T) {
tests := []struct {
scenario string
function func(*testing.T, func([]ksuid.KSUID) []byte, func([]byte) []ksuid.KSUID)
}{
{
scenario: "sparse",
function: testCompressSparse,
},
{
scenario: "packed",
function: testCompressPacked,
},
{
scenario: "mixed",
function: testCompressMixed,
},
}
for _, test := range tests {
t.Run(test.scenario, func(t *testing.T) {
t.Run("gzip", func(t *testing.T) {
test.function(t, compressGzip, decompressGzip)
})
t.Run("snappy", func(t *testing.T) {
test.function(t, compressSnappy, decompressSnappy)
})
})
}
}
func testCompressSparse(t *testing.T, compress func([]ksuid.KSUID) []byte, decompress func([]byte) []ksuid.KSUID) {
now := time.Now()
times := [100]time.Time{}
for i := range times {
times[i] = now.Add(time.Duration(i) * 2 * time.Second)
}
ksuids := [1000]ksuid.KSUID{}
for i := range ksuids {
ksuids[i], _ = ksuid.NewRandomWithTime(times[i%len(times)])
}
set := compress(ksuids[:])
for i, id := range decompress(set) {
if i >= len(ksuids) {
t.Error("too many KSUIDs were produced by the set iterator")
break
}
if ksuids[i] != id {
t.Errorf("bad KSUID at index %d: expected %s but found %s", i, ksuids[i], id)
}
i++
}
reportCompressionRatio(t, ksuids[:], set)
}
func testCompressPacked(t *testing.T, compress func([]ksuid.KSUID) []byte, decompress func([]byte) []ksuid.KSUID) {
sequences := [10]ksuid.Sequence{}
for i := range sequences {
sequences[i] = ksuid.Sequence{Seed: ksuid.New()}
}
ksuids := [1000]ksuid.KSUID{}
for i := range ksuids {
ksuids[i], _ = sequences[i%len(sequences)].Next()
}
set := compress(ksuids[:])
for i, id := range decompress(set) {
if i >= len(ksuids) {
t.Error("too many KSUIDs were produced by the set iterator")
break
}
if ksuids[i] != id {
t.Errorf("bad KSUID at index %d: expected %s but found %s", i, ksuids[i], id)
}
i++
}
reportCompressionRatio(t, ksuids[:], set)
}
func testCompressMixed(t *testing.T, compress func([]ksuid.KSUID) []byte, decompress func([]byte) []ksuid.KSUID) {
now := time.Now()
times := [20]time.Time{}
for i := range times {
times[i] = now.Add(time.Duration(i) * 2 * time.Second)
}
sequences := [200]ksuid.Sequence{}
for i := range sequences {
seed, _ := ksuid.NewRandomWithTime(times[i%len(times)])
sequences[i] = ksuid.Sequence{Seed: seed}
}
ksuids := [1000]ksuid.KSUID{}
for i := range ksuids {
ksuids[i], _ = sequences[i%len(sequences)].Next()
}
set := compress(ksuids[:])
for i, id := range decompress(set) {
if i >= len(ksuids) {
t.Error("too many KSUIDs were produced by the set iterator")
break
}
if ksuids[i] != id {
t.Errorf("bad KSUID at index %d: expected %s but found %s", i, ksuids[i], id)
}
i++
}
reportCompressionRatio(t, ksuids[:], set)
}
func reportCompressionRatio(t *testing.T, ksuids []ksuid.KSUID, set []byte) {
len1 := int(unsafe.Sizeof(ksuid.KSUID{})) * len(ksuids)
len2 := len(set)
t.Logf("original %d B, compressed %d B (%.4g%%)", len1, len2, 100*(1-(float64(len2)/float64(len1))))
}
func BenchmarkCompress(b *testing.B) {
b.Run("gzip", func(b *testing.B) { benchmark(b, compressGzip, decompressGzip) })
b.Run("snappy", func(b *testing.B) { benchmark(b, compressSnappy, decompressSnappy) })
}
func benchmark(b *testing.B, compress func([]ksuid.KSUID) []byte, decompress func([]byte) []ksuid.KSUID) {
ksuids := [1000]ksuid.KSUID{}
for i := range ksuids {
ksuids[i] = ksuid.New()
}
set := compress(ksuids[:])
b.Run("write", func(b *testing.B) {
for i := 0; i != b.N; i++ {
compress(ksuids[:])
}
})
b.Run("read", func(b *testing.B) {
for i := 0; i != b.N; i++ {
for range decompress(set) {
}
}
})
}
func compressGzip(ksuids []ksuid.KSUID) []byte {
b := &bytes.Buffer{}
w, _ := gzip.NewWriterLevel(b, gzip.BestCompression)
w.Write(ksuidsToBytes(ksuids))
w.Close()
return b.Bytes()
}
func compressSnappy(ksuids []ksuid.KSUID) []byte {
return snappy.Encode(nil, ksuidsToBytes(ksuids))
}
func decompressGzip(b []byte) []ksuid.KSUID {
a := &bytes.Buffer{}
r, _ := gzip.NewReader(bytes.NewReader(b))
a.ReadFrom(r)
return bytesToKsuids(a.Bytes())
}
func decompressSnappy(b []byte) []ksuid.KSUID {
b, _ = snappy.Decode(nil, b)
return bytesToKsuids(b)
}
func ksuidsToBytes(ksuids []ksuid.KSUID) []byte {
return *(*[]byte)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(&ksuids[0])),
Len: int(unsafe.Sizeof(ksuids[0])) * len(ksuids),
Cap: int(unsafe.Sizeof(ksuids[0])) * cap(ksuids),
}))
}
func bytesToKsuids(b []byte) []ksuid.KSUID {
return *(*[]ksuid.KSUID)(unsafe.Pointer(&reflect.SliceHeader{
Data: uintptr(unsafe.Pointer(&b[0])),
Len: len(b) / int(unsafe.Sizeof(ksuid.KSUID{})),
Cap: cap(b) / int(unsafe.Sizeof(ksuid.KSUID{})),
}))
}
@achille-roussel
Copy link
Author

achille-roussel commented Sep 28, 2017

$ go test -v -bench . -benchmem
=== RUN   TestCompress
=== RUN   TestCompress/sparse
=== RUN   TestCompress/sparse/gzip
=== RUN   TestCompress/sparse/snappy
=== RUN   TestCompress/packed
=== RUN   TestCompress/packed/gzip
=== RUN   TestCompress/packed/snappy
=== RUN   TestCompress/mixed
=== RUN   TestCompress/mixed/gzip
=== RUN   TestCompress/mixed/snappy
--- PASS: TestCompress (0.01s)
    --- PASS: TestCompress/sparse (0.00s)
        --- PASS: TestCompress/sparse/gzip (0.00s)
        	test_test.go:140: original 20000 B, compressed 18415 B (7.925%)
        --- PASS: TestCompress/sparse/snappy (0.00s)
        	test_test.go:140: original 20000 B, compressed 20009 B (-0.045%)
    --- PASS: TestCompress/packed (0.00s)
        --- PASS: TestCompress/packed/gzip (0.00s)
        	test_test.go:140: original 20000 B, compressed 2502 B (87.49%)
        --- PASS: TestCompress/packed/snappy (0.00s)
        	test_test.go:140: original 20000 B, compressed 5129 B (74.35%)
    --- PASS: TestCompress/mixed (0.00s)
        --- PASS: TestCompress/mixed/gzip (0.00s)
        	test_test.go:140: original 20000 B, compressed 5530 B (72.35%)
        --- PASS: TestCompress/mixed/snappy (0.00s)
        	test_test.go:140: original 20000 B, compressed 7651 B (61.75%)
goos: darwin
goarch: amd64
BenchmarkCompress/gzip/write-4   	     500	   3039856 ns/op	   6.10 MB/s	  852768 B/op	      24 allocs/op
BenchmarkCompress/gzip/read-4    	    5000	    328330 ns/op	  56.45 MB/s	  104760 B/op	      14 allocs/op
BenchmarkCompress/snappy/write-4 	   30000	     46682 ns/op	 422.08 MB/s	   24576 B/op	       1 allocs/op
BenchmarkCompress/snappy/read-4  	  200000	      8733 ns/op	2256.15 MB/s	   20480 B/op	       1 allocs/op
PASS
ok  	_/tmp	7.337s

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment