Skip to content

Instantly share code, notes, and snippets.

@sevagh
Created January 7, 2020 03:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sevagh/cb96fccdec4369d3f10f1198a2ac93ab to your computer and use it in GitHub Desktop.
Save sevagh/cb96fccdec4369d3f10f1198a2ac93ab to your computer and use it in GitHub Desktop.
basic Bloom Filter with 4 hashing functions
use bit_vec::BitVec;
use std::hash::{Hash, Hasher};
// use 4 hash functions
use ahash::AHasher;
use fnv::FnvHasher;
use metrohash::MetroHash64;
use siphasher::sip::SipHasher;
pub struct BloomFilter {
bits: BitVec,
k: usize,
hash_builders: Vec<fn() -> Box<dyn Hasher>>,
}
fn builder1() -> Box<dyn Hasher> {
Box::new(FnvHasher::default())
}
fn builder2() -> Box<dyn Hasher> {
Box::new(SipHasher::new())
}
fn builder3() -> Box<dyn Hasher> {
Box::new(AHasher::default())
}
fn builder4() -> Box<dyn Hasher> {
Box::new(MetroHash64::new())
}
impl BloomFilter {
pub fn with_capacity(k: usize) -> Self {
BloomFilter {
k,
bits: BitVec::from_elem(k, false),
hash_builders: vec![builder1, builder2, builder3, builder4],
}
}
pub fn insert<T: Hash>(&mut self, val: T) {
for h in self.hash_builders.iter() {
let mut hasher = h();
val.hash(&mut hasher);
self.bits.set((hasher.finish() as usize) % self.k, true);
}
}
pub fn test<T: Hash>(&self, val: T) -> bool {
let mut membership = true;
for h in self.hash_builders.iter() {
let mut hasher = h();
val.hash(&mut hasher);
membership &= self.bits.get((hasher.finish() as usize) % self.k).unwrap();
if !membership {
return membership;
}
}
membership
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_insert() {
let mut bm = BloomFilter::with_capacity(10);
let val = "hello world";
bm.insert(val);
assert!(bm.test(val));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment