Last active
February 18, 2024 15:19
-
-
Save moosejaw/5a94fde7ffd81746fff3e4f948311557 to your computer and use it in GitHub Desktop.
Go: Basic Bloom filter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"crypto" | |
_ "crypto/md5" | |
_ "crypto/sha256" | |
"encoding/hex" | |
"errors" | |
"fmt" | |
"math" | |
"strconv" | |
) | |
type BloomFilter struct { | |
m uint // array size. | |
k uint // number of hash functions to be used when inserting. | |
n uint // counter for number of elements inserted. | |
arr []bool // bool array of size m to hold the bit values. | |
} | |
func (bf *BloomFilter) hash(s string, internalHashFunc crypto.Hash) (int64, error) { | |
// Start by hashing the string with the given function. | |
hash := internalHashFunc.New() | |
hash.Write([]byte(s)) | |
// Now convert it to hex so that we can perform arithmetic operations. | |
hx := hex.EncodeToString(hash.Sum(nil))[:15] // slicing stops out of range errors from ParseInt | |
res, err := strconv.ParseInt(hx, 16, 64) | |
if err != nil { | |
return 0, err | |
} | |
calc := func(x int64) int64 { return int64(math.Round(math.Pow(float64(res), 0.612))) % int64(bf.m) } | |
out := calc(res) | |
return out, nil | |
} | |
func (bf *BloomFilter) calcFalsePositiveProb() float64 { | |
// (1−(1−1/m)^(kn))^k | |
inner := 1.0 - float64(1.0/float64(bf.m)) | |
outer := math.Pow(inner, float64(bf.k*bf.n)) | |
return math.Pow(outer, float64(bf.k)) | |
} | |
func (bf *BloomFilter) Add(s string) { | |
fmt.Printf("add: ") | |
var h crypto.Hash | |
var f string | |
for i := 0; i < int(bf.k); i++ { | |
if i == 0 { | |
h = crypto.MD5 | |
f = "md5" | |
} else { | |
h = crypto.SHA256 | |
f = "sha256" | |
} | |
idx, err := bf.hash(s, h) | |
if err != nil { | |
fmt.Print(err) | |
} else { | |
s := "" | |
if i == 1 { | |
s = ", " | |
} | |
fmt.Printf("%shash %d (%s) : idx %2d ", s, i, f, idx) | |
} | |
bf.arr[idx] = true | |
} | |
bf.n++ | |
fmt.Printf(" ; n %d\n", bf.n) | |
} | |
func (bf *BloomFilter) Search(s string) { | |
var h crypto.Hash | |
for i := 0; i < int(bf.k); i++ { | |
if i == 0 { | |
h = crypto.MD5 | |
} else { | |
h = crypto.SHA256 | |
} | |
idx, err := bf.hash(s, h) | |
if err != nil { | |
fmt.Print(err) | |
return | |
} | |
if !bf.arr[idx] { | |
fmt.Printf("search: %32s : NOT IN filter\n", s) | |
return | |
} | |
} | |
prob := bf.calcFalsePositiveProb() | |
fmt.Printf( | |
"search: %32s : IN filter (false pos approx %%: %f , # n: %d)\n", | |
s, prob, bf.n, | |
) | |
} | |
func (bf *BloomFilter) Show() { | |
for i := range bf.arr { | |
if !bf.arr[i] { | |
fmt.Print("[0] ") | |
} else { | |
fmt.Print("[1] ") | |
} | |
} | |
} | |
func initBloomFilter(m uint, k uint) (BloomFilter, error) { | |
bf := &BloomFilter{} | |
if k > 2 || k == 0 { | |
return *bf, errors.New("k cannot be 0 or greater than 2") | |
} | |
if m > 64 { | |
return *bf, errors.New("m cannot be greater than 64 (for brevity)") | |
} | |
bf.m = m | |
bf.k = k | |
bf.n = 0 | |
bf.arr = make([]bool, bf.m) | |
return *bf, nil | |
} | |
func main() { | |
const m uint = 64 | |
const k uint = 2 | |
filter, err := initBloomFilter(m, k) | |
if err != nil { | |
return | |
} | |
filter.Add("Hello world!") | |
filter.Add("Test12345") | |
filter.Add("Neither is this..!") | |
filter.Add("Neither is this..!!!1") | |
filter.Add("Not in filter...") | |
filter.Add("abcHello world!") | |
filter.Search("Not in filter.") | |
filter.Search("Neither is this.") | |
filter.Search("Hello world!") | |
filter.Search("Test12345") | |
filter.Search("Neither is this.werwer.!") | |
filter.Search("Neither is thiswerfwer..!!!1") | |
filter.Search("Not in filter..werwer.") | |
filter.Add("Not in filter..werwer.") | |
filter.Add("Adding some more phrases.") | |
filter.Add("And more.") | |
filter.Search("Hello world!") | |
filter.Search("Test12345") | |
filter.Show() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment