Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
package bloomcounter
import (
"encoding/binary"
"bytes"
"crypto/md5"
)
type BloomCounter struct {
buckets []uint
nhashes int
limit int
}
// constructor
func New(buckets int, limit int) *BloomCounter {
bf := new(BloomCounter)
bf.buckets = make([]uint, buckets)
bf.nhashes = 8
bf.limit = limit
return bf
}
// add a new item
// returns a boolean, if it's true the word has hit it's limit and should be counted.
func (bf *BloomCounter) Add(item string) bool {
hashes := get_hashes(item, bf.nhashes, len(bf.buckets))
hitLimit := true
for _, hash := range hashes {
if bf.buckets[hash] < uint(bf.limit) {
bf.buckets[hash]++
hitLimit = false
}
}
return hitLimit
}
// return a uint32 array of k hashes between 0 and m
func get_hashes(item string, k int, m int) []uint32 {
hashes := make([]uint32, k)
for i := range hashes {
a, b := hash(item)
hashes[i] = (a + b * uint32(i)) % uint32(m)
}
return hashes;
}
// returns two uint32s. one hash two values, sneaky.
func hash(item string) (uint32, uint32) {
h := md5.New()
h.Write([]byte(item))
buf := bytes.NewBuffer(h.Sum(nil))
var hash1 uint32
var hash2 uint32
binary.Read(buf, binary.LittleEndian, &hash1)
binary.Read(buf, binary.LittleEndian, &hash2)
return hash1, hash2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment