advincze/compare_pearson_fnv_hash.go

## compare_pearson_fnv_hash.go
package main

import (
	"fmt"
	"hash/fnv"
	"testing"

	"math/rand"

	"time"

	"github.com/montanaflynn/stats"
)

func TestHashes(t *testing.T) {
	t.Run("fnv", testHash(fnvhash, 1e6, 32))
	t.Run("pearson", testHash(func(msg []byte) []byte {
		return pearson(msg, scramble([]byte("foobar")), 4)
	}, 1e6, 32))
}

func testHash(fn func([]byte) []byte, iter int, bytecount int) func(t *testing.T) {
	return func(t *testing.T) {
		rand.Seed(42)
		hist := make([]map[int]int, 4)
		for i := 0; i < 4; i++ {
			hist[i] = map[int]int{}
		}
		msg := make([]byte, bytecount)
		func() {
			defer func(s time.Time) { fmt.Println("took", time.Since(s)) }(time.Now())
			for i := 0; i < iter; i++ {
				rand.Read(msg)
				res := fn(msg)
				for j := 0; j < 4; j++ {
					hist[j][int(res[j])]++
				}
			}
		}()
		for i := 0; i < 4; i++ {
			h := stats.LoadRawData(hist[i])
			m, _ := stats.Mean(h)
			stddev, _ := stats.StandardDeviation(h)
			fmt.Println(m, stddev, "=", stddev*100.0/m, "%")
		}
	}
}

func fnvhash(msg []byte) []byte {
	h := fnv.New32()
	h.Write(msg)
	res := h.Sum32()
	out := make([]byte, 4)
	for i := 0; i < 4; i++ {
		out[i] = byte(res >> uint(i*8) % 256)
	}
	return out
}

func pearson(msg, iv []byte, bytecount int) []byte {
	var h byte
	hh := make([]byte, bytecount)
	for j := 0; j < bytecount; j++ {
		h = iv[(int(msg[0])+j)%256]
		for i := 1; i < len(msg); i++ {
			h = iv[h^msg[i]]
		}
		hh[j] = h
	}
	return hh
}

func scramble(seed []byte) []byte {
	ss := make([]byte, 256)
	for i := 0; i < 256; i++ {
		ss[i] = byte(i)
	}
	j := 0
	for i := 0; i < 256; i++ {
		j = (j + int(ss[i]) + int(seed[i%len(seed)])) % 256
		ss[i], ss[j] = ss[j], ss[i]
	}
	return ss
}
	package main

	import (
	"fmt"
	"hash/fnv"
	"testing"

	"math/rand"

	"time"

	"github.com/montanaflynn/stats"
	)

	func TestHashes(t *testing.T) {
	t.Run("fnv", testHash(fnvhash, 1e6, 32))
	t.Run("pearson", testHash(func(msg []byte) []byte {
	return pearson(msg, scramble([]byte("foobar")), 4)
	}, 1e6, 32))
	}

	func testHash(fn func([]byte) []byte, iter int, bytecount int) func(t *testing.T) {
	return func(t *testing.T) {
	rand.Seed(42)
	hist := make([]map[int]int, 4)
	for i := 0; i < 4; i++ {
	hist[i] = map[int]int{}
	}
	msg := make([]byte, bytecount)
	func() {
	defer func(s time.Time) { fmt.Println("took", time.Since(s)) }(time.Now())
	for i := 0; i < iter; i++ {
	rand.Read(msg)
	res := fn(msg)
	for j := 0; j < 4; j++ {
	hist[j][int(res[j])]++
	}
	}
	}()
	for i := 0; i < 4; i++ {
	h := stats.LoadRawData(hist[i])
	m, _ := stats.Mean(h)
	stddev, _ := stats.StandardDeviation(h)
	fmt.Println(m, stddev, "=", stddev*100.0/m, "%")
	}
	}
	}

	func fnvhash(msg []byte) []byte {
	h := fnv.New32()
	h.Write(msg)
	res := h.Sum32()
	out := make([]byte, 4)
	for i := 0; i < 4; i++ {
	out[i] = byte(res >> uint(i*8) % 256)
	}
	return out
	}

	func pearson(msg, iv []byte, bytecount int) []byte {
	var h byte
	hh := make([]byte, bytecount)
	for j := 0; j < bytecount; j++ {
	h = iv[(int(msg[0])+j)%256]
	for i := 1; i < len(msg); i++ {
	h = iv[h^msg[i]]
	}
	hh[j] = h
	}
	return hh
	}

	func scramble(seed []byte) []byte {
	ss := make([]byte, 256)
	for i := 0; i < 256; i++ {
	ss[i] = byte(i)
	}
	j := 0
	for i := 0; i < 256; i++ {
	j = (j + int(ss[i]) + int(seed[i%len(seed)])) % 256
	ss[i], ss[j] = ss[j], ss[i]
	}
	return ss
	}