Last active
January 31, 2023 22:40
-
-
Save luminousmen/f7969aa196af41b9979087e2e738d0f2 to your computer and use it in GitHub Desktop.
Sample bloom filter implementations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Hash implementations from | |
# http://www.partow.net/programming/hashfunctions/index.html | |
# Author: @luminousmen | |
class BloomFilter: | |
def __init__(self, size: int): | |
self.size = size | |
self.bitmap = size * [0] | |
def FNV(self, key: str): | |
fnv_prime = 0x811C9DC5 | |
hash = 0 | |
for i in range(len(key)): | |
hash *= fnv_prime | |
hash ^= ord(key[i]) | |
return hash | |
def DJB(self, key: str): | |
hash = 5381 | |
for i in range(len(key)): | |
hash = ((hash << 5) + hash) + ord(key[i]) | |
return hash | |
def calculate_hash(self, value): | |
# FNV | |
hash1 = self.FNV(value) % self.size | |
# MurMur | |
hash2 = self.DJB(value) % self.size | |
return hash1, hash2 | |
def add(self, value: str): | |
for h in self.calculate_hash(value): | |
self.bitmap[h] = 1 | |
def check(self, value): | |
for h in self.calculate_hash(value): | |
if not self.bitmap[h]: | |
return False | |
return True | |
if __name__ == "__main__": | |
coffees = [ | |
"Iced Coffee", | |
"Iced Coffee with Milk", | |
"Espresso", | |
"Espresso Macchiato", | |
"Flat White", | |
"Latte Macchiato", | |
"Americano", | |
"Mocha", | |
] | |
bloom = BloomFilter(20) | |
for drink in coffees: | |
bloom.add(drink) | |
print(bloom.bitmap) | |
print(bloom.check("Flat White")) | |
print(bloom.check("Cappuccino")) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import hashlib | |
class BloomFilter: | |
def __init__(self, size, num_hashes, salt=None): | |
self.size = size | |
self.num_hashes = num_hashes | |
self.salt = salt or '' | |
self.bit_array = [0] * size | |
def add(self, element): | |
for i in range(self.num_hashes): | |
digest = hashlib.sha1((self.salt + str(element) + str(i)).encode('utf-8')).hexdigest() | |
index = int(digest, 16) % self.size | |
self.bit_array[index] = 1 | |
def __contains__(self, element): | |
for i in range(self.num_hashes): | |
digest = hashlib.sha1((self.salt + str(element) + str(i)).encode('utf-8')).hexdigest() | |
index = int(digest, 16) % self.size | |
if self.bit_array[index] == 0: | |
return False | |
return True | |
for h in self.calculate_hash(value): | |
if not self.bitmap[h]: | |
return False | |
return True | |
if __name__ == "__main__": | |
coffees = [ | |
"Iced Coffee", | |
"Iced Coffee with Milk", | |
"Espresso", | |
"Espresso Macchiato", | |
"Flat White", | |
"Latte Macchiato", | |
"Cappuccino", | |
"Mocha", | |
] | |
bloom = BloomFilter(20, 2) | |
for drink in coffees: | |
bloom.add(drink) | |
print(bloom.bit_array) | |
print("Flat White" in bloom) | |
print("Americano" in bloom) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment