Skip to content

Instantly share code, notes, and snippets.

@luminousmen
Last active January 31, 2023 22:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luminousmen/f7969aa196af41b9979087e2e738d0f2 to your computer and use it in GitHub Desktop.
Save luminousmen/f7969aa196af41b9979087e2e738d0f2 to your computer and use it in GitHub Desktop.
Sample bloom filter implementations
# Hash implementations from
# http://www.partow.net/programming/hashfunctions/index.html
# Author: @luminousmen
class BloomFilter:
def __init__(self, size: int):
self.size = size
self.bitmap = size * [0]
def FNV(self, key: str):
fnv_prime = 0x811C9DC5
hash = 0
for i in range(len(key)):
hash *= fnv_prime
hash ^= ord(key[i])
return hash
def DJB(self, key: str):
hash = 5381
for i in range(len(key)):
hash = ((hash << 5) + hash) + ord(key[i])
return hash
def calculate_hash(self, value):
# FNV
hash1 = self.FNV(value) % self.size
# MurMur
hash2 = self.DJB(value) % self.size
return hash1, hash2
def add(self, value: str):
for h in self.calculate_hash(value):
self.bitmap[h] = 1
def check(self, value):
for h in self.calculate_hash(value):
if not self.bitmap[h]:
return False
return True
if __name__ == "__main__":
coffees = [
"Iced Coffee",
"Iced Coffee with Milk",
"Espresso",
"Espresso Macchiato",
"Flat White",
"Latte Macchiato",
"Americano",
"Mocha",
]
bloom = BloomFilter(20)
for drink in coffees:
bloom.add(drink)
print(bloom.bitmap)
print(bloom.check("Flat White"))
print(bloom.check("Cappuccino"))
import hashlib
class BloomFilter:
def __init__(self, size, num_hashes, salt=None):
self.size = size
self.num_hashes = num_hashes
self.salt = salt or ''
self.bit_array = [0] * size
def add(self, element):
for i in range(self.num_hashes):
digest = hashlib.sha1((self.salt + str(element) + str(i)).encode('utf-8')).hexdigest()
index = int(digest, 16) % self.size
self.bit_array[index] = 1
def __contains__(self, element):
for i in range(self.num_hashes):
digest = hashlib.sha1((self.salt + str(element) + str(i)).encode('utf-8')).hexdigest()
index = int(digest, 16) % self.size
if self.bit_array[index] == 0:
return False
return True
for h in self.calculate_hash(value):
if not self.bitmap[h]:
return False
return True
if __name__ == "__main__":
coffees = [
"Iced Coffee",
"Iced Coffee with Milk",
"Espresso",
"Espresso Macchiato",
"Flat White",
"Latte Macchiato",
"Cappuccino",
"Mocha",
]
bloom = BloomFilter(20, 2)
for drink in coffees:
bloom.add(drink)
print(bloom.bit_array)
print("Flat White" in bloom)
print("Americano" in bloom)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment