Skip to content

Instantly share code, notes, and snippets.

@moosejaw
Last active February 18, 2024 15:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save moosejaw/5a94fde7ffd81746fff3e4f948311557 to your computer and use it in GitHub Desktop.
Save moosejaw/5a94fde7ffd81746fff3e4f948311557 to your computer and use it in GitHub Desktop.
Go: Basic Bloom filter
package main
import (
"crypto"
_ "crypto/md5"
_ "crypto/sha256"
"encoding/hex"
"errors"
"fmt"
"math"
"strconv"
)
type BloomFilter struct {
m uint // array size.
k uint // number of hash functions to be used when inserting.
n uint // counter for number of elements inserted.
arr []bool // bool array of size m to hold the bit values.
}
func (bf *BloomFilter) hash(s string, internalHashFunc crypto.Hash) (int64, error) {
// Start by hashing the string with the given function.
hash := internalHashFunc.New()
hash.Write([]byte(s))
// Now convert it to hex so that we can perform arithmetic operations.
hx := hex.EncodeToString(hash.Sum(nil))[:15] // slicing stops out of range errors from ParseInt
res, err := strconv.ParseInt(hx, 16, 64)
if err != nil {
return 0, err
}
calc := func(x int64) int64 { return int64(math.Round(math.Pow(float64(res), 0.612))) % int64(bf.m) }
out := calc(res)
return out, nil
}
func (bf *BloomFilter) calcFalsePositiveProb() float64 {
// (1−(1−1/m)^(kn))^k
inner := 1.0 - float64(1.0/float64(bf.m))
outer := math.Pow(inner, float64(bf.k*bf.n))
return math.Pow(outer, float64(bf.k))
}
func (bf *BloomFilter) Add(s string) {
fmt.Printf("add: ")
var h crypto.Hash
var f string
for i := 0; i < int(bf.k); i++ {
if i == 0 {
h = crypto.MD5
f = "md5"
} else {
h = crypto.SHA256
f = "sha256"
}
idx, err := bf.hash(s, h)
if err != nil {
fmt.Print(err)
} else {
s := ""
if i == 1 {
s = ", "
}
fmt.Printf("%shash %d (%s) : idx %2d ", s, i, f, idx)
}
bf.arr[idx] = true
}
bf.n++
fmt.Printf(" ; n %d\n", bf.n)
}
func (bf *BloomFilter) Search(s string) {
var h crypto.Hash
for i := 0; i < int(bf.k); i++ {
if i == 0 {
h = crypto.MD5
} else {
h = crypto.SHA256
}
idx, err := bf.hash(s, h)
if err != nil {
fmt.Print(err)
return
}
if !bf.arr[idx] {
fmt.Printf("search: %32s : NOT IN filter\n", s)
return
}
}
prob := bf.calcFalsePositiveProb()
fmt.Printf(
"search: %32s : IN filter (false pos approx %%: %f , # n: %d)\n",
s, prob, bf.n,
)
}
func (bf *BloomFilter) Show() {
for i := range bf.arr {
if !bf.arr[i] {
fmt.Print("[0] ")
} else {
fmt.Print("[1] ")
}
}
}
func initBloomFilter(m uint, k uint) (BloomFilter, error) {
bf := &BloomFilter{}
if k > 2 || k == 0 {
return *bf, errors.New("k cannot be 0 or greater than 2")
}
if m > 64 {
return *bf, errors.New("m cannot be greater than 64 (for brevity)")
}
bf.m = m
bf.k = k
bf.n = 0
bf.arr = make([]bool, bf.m)
return *bf, nil
}
func main() {
const m uint = 64
const k uint = 2
filter, err := initBloomFilter(m, k)
if err != nil {
return
}
filter.Add("Hello world!")
filter.Add("Test12345")
filter.Add("Neither is this..!")
filter.Add("Neither is this..!!!1")
filter.Add("Not in filter...")
filter.Add("abcHello world!")
filter.Search("Not in filter.")
filter.Search("Neither is this.")
filter.Search("Hello world!")
filter.Search("Test12345")
filter.Search("Neither is this.werwer.!")
filter.Search("Neither is thiswerfwer..!!!1")
filter.Search("Not in filter..werwer.")
filter.Add("Not in filter..werwer.")
filter.Add("Adding some more phrases.")
filter.Add("And more.")
filter.Search("Hello world!")
filter.Search("Test12345")
filter.Show()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment