Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
compare the bias of pearson and fnv hashes on bytes
package main
import (
"fmt"
"hash/fnv"
"testing"
"math/rand"
"time"
"github.com/montanaflynn/stats"
)
func TestHashes(t *testing.T) {
t.Run("fnv", testHash(fnvhash, 1e6, 32))
t.Run("pearson", testHash(func(msg []byte) []byte {
return pearson(msg, scramble([]byte("foobar")), 4)
}, 1e6, 32))
}
func testHash(fn func([]byte) []byte, iter int, bytecount int) func(t *testing.T) {
return func(t *testing.T) {
rand.Seed(42)
hist := make([]map[int]int, 4)
for i := 0; i < 4; i++ {
hist[i] = map[int]int{}
}
msg := make([]byte, bytecount)
func() {
defer func(s time.Time) { fmt.Println("took", time.Since(s)) }(time.Now())
for i := 0; i < iter; i++ {
rand.Read(msg)
res := fn(msg)
for j := 0; j < 4; j++ {
hist[j][int(res[j])]++
}
}
}()
for i := 0; i < 4; i++ {
h := stats.LoadRawData(hist[i])
m, _ := stats.Mean(h)
stddev, _ := stats.StandardDeviation(h)
fmt.Println(m, stddev, "=", stddev*100.0/m, "%")
}
}
}
func fnvhash(msg []byte) []byte {
h := fnv.New32()
h.Write(msg)
res := h.Sum32()
out := make([]byte, 4)
for i := 0; i < 4; i++ {
out[i] = byte(res >> uint(i*8) % 256)
}
return out
}
func pearson(msg, iv []byte, bytecount int) []byte {
var h byte
hh := make([]byte, bytecount)
for j := 0; j < bytecount; j++ {
h = iv[(int(msg[0])+j)%256]
for i := 1; i < len(msg); i++ {
h = iv[h^msg[i]]
}
hh[j] = h
}
return hh
}
func scramble(seed []byte) []byte {
ss := make([]byte, 256)
for i := 0; i < 256; i++ {
ss[i] = byte(i)
}
j := 0
for i := 0; i < 256; i++ {
j = (j + int(ss[i]) + int(seed[i%len(seed)])) % 256
ss[i], ss[j] = ss[j], ss[i]
}
return ss
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment