Skip to content

Instantly share code, notes, and snippets.

@hhatto
Created September 13, 2017 05:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hhatto/c65d75c572c895355aad62964f9fa86e to your computer and use it in GitHub Desktop.
Save hhatto/c65d75c572c895355aad62964f9fa86e to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"log"
"github.com/clarkduvall/hyperloglog"
"github.com/dchest/siphash"
metro "github.com/dgryski/go-metro"
gohll "github.com/sasha-s/go-hll"
"github.com/seiflotfy/hyperbitbit"
"github.com/spaolacci/murmur3"
)
const nInit int = 1000
func hbb(n int) {
hbb := hyperbitbit.New()
for i := 0; i < n; i++ {
hbb.Add([]byte(fmt.Sprintf("%06d", i)))
}
count := hbb.Cardinality()
fmt.Printf(" %-45s: %8d (%3.3f%%)\n",
"Hyperbitbit", count, float64(int(count)-n)/float64(n)*100.)
}
func hllWithMetroHash(n, precision int) {
s, err := gohll.SizeByP(precision)
if err != nil {
log.Panicln(err)
}
hll := make(gohll.HLL, s)
for i := 0; i < n; i++ {
hll.Add(metro.Hash64([]byte(fmt.Sprintf("%06d", i)), 42))
}
count := hll.EstimateCardinality()
fmt.Printf(" %-45s: %8d (%3.3f%%)\n",
"go-hll's Hyperloglog (metro)", count, float64(int(count)-n)/float64(n)*100.)
}
func hll(n, precision int) {
s, err := gohll.SizeByP(precision)
if err != nil {
log.Panicln(err)
}
hll := make(gohll.HLL, s)
for i := 0; i < n; i++ {
//hll.Add(murmur3.Hash128(2, 57, []byte(fmt.Sprintf("%06d", i))))
hll.Add(siphash.Hash(2, 57, []byte(fmt.Sprintf("%06d", i))))
}
count := hll.EstimateCardinality()
fmt.Printf(" %-45s: %8d (%3.3f%%)\n",
"go-hll's Hyperloglog (siphash)", count, float64(int(count)-n)/float64(n)*100.)
}
func hloglog(n, precision int) {
hll, err := hyperloglog.New(uint8(precision))
if err != nil {
log.Panicln(err)
}
for i := 0; i < n; i++ {
hs := murmur3.New32()
hs.Write([]byte(fmt.Sprintf("%06d", i)))
hll.Add(hs)
}
count := hll.Count()
fmt.Printf(" %-45s: %8d (%3.3f%%)\n",
"hyperloglog's Hyperloglog (murmur3)", count, float64(int(count)-n)/float64(n)*100.)
}
func hloglogpp(n, precision int) {
hllpp, err := hyperloglog.NewPlus(uint8(precision))
if err != nil {
log.Panicln(err)
}
for i := 0; i < n; i++ {
hs := murmur3.New64()
hs.Write([]byte(fmt.Sprintf("%06d", i)))
hllpp.Add(hs)
}
count := hllpp.Count()
fmt.Printf(" %-45s: %8d (%3.3f%%)\n",
"hyperloglog's Hyperloglog++ (murmur3)", count, float64(int(count)-n)/float64(n)*100.)
}
func main() {
ns := []int{
nInit,
nInit * 10,
nInit * 100,
nInit * 1000,
}
precisions := []int{4, 8, 16}
for _, n := range ns {
fmt.Println("==========")
for _, precision := range precisions {
fmt.Printf("precision: %d\n", precision)
fmt.Printf(" %-45s: %8d\n", "Uniq Data Dum", n)
hbb(n)
hll(n, precision)
hllWithMetroHash(n, precision)
hloglog(n, precision)
hloglogpp(n, precision)
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment