Skip to content

Instantly share code, notes, and snippets.

@Alligator
Created March 3, 2014 19:03
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save Alligator/9332216 to your computer and use it in GitHub Desktop.
package bloomfilter
import (
"crypto/md5"
"encoding/binary"
"bytes"
)
type BloomFilter struct {
buckets []bool
nhashes int
}
// this is a constructor type thing i guess
func New(buckets int) *BloomFilter {
bf := new(BloomFilter)
bf.buckets = make([]bool, buckets)
bf.nhashes = 8
return bf
}
// add a new item
func (bf *BloomFilter) Add(item string) {
hashes := get_hashes(item, bf.nhashes, len(bf.buckets))
for _, hash := range hashes {
bf.buckets[hash] = true
}
}
// check if we've seem it before
func (bf *BloomFilter) Has(item string) bool {
hashes := get_hashes(item, bf.nhashes, len(bf.buckets))
for _, hash := range hashes {
if bf.buckets[hash] == false {
return false;
}
}
return true;
}
// return a uint32 array of k hashes between 0 and m
func get_hashes(item string, k int, m int) []uint32 {
hashes := make([]uint32, k)
for i := range hashes {
a, b := hash(item)
hashes[i] = (a + b * uint32(i)) % uint32(m)
}
return hashes;
}
// returns two uint32s. one hash two values, sneaky.
func hash(item string) (uint32, uint32) {
h := md5.New()
h.Write([]byte(item))
buf := bytes.NewBuffer(h.Sum(nil))
var hash1 uint32
var hash2 uint32
binary.Read(buf, binary.LittleEndian, &hash1)
binary.Read(buf, binary.LittleEndian, &hash2)
return hash1, hash2
}
package main
import (
"fmt"
"strings"
"bloomfilter"
)
func main() {
var line = "hello dave how are you mate dave"
words := strings.Split(line, " ")
m := 512 // no. of buckets
bf := bloomfilter.New(m)
fmt.Println(words)
// skipping some words for testing
for _, word := range words[0:3] {
bf.Add(word)
}
for _, word := range words {
fmt.Printf("%s \t %t\n", word, bf.Has(word))
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment