Skip to content

Instantly share code, notes, and snippets.

@abersheeran
Last active March 25, 2022 06:00
Show Gist options
  • Save abersheeran/210f5c1a6f36721302f755e39a242e50 to your computer and use it in GitHub Desktop.
Save abersheeran/210f5c1a6f36721302f755e39a242e50 to your computer and use it in GitHub Desktop.
Bloom filter by python3
import math
from hashlib import blake2b
class BloomFilter:
def __init__(self, elements_count: int, *, error_rate: float = 0.0001) -> None:
byte_count = 1 + int(
-(math.log(error_rate) / math.log(2) ** 2) * elements_count // 8
)
digest_size = int(math.log2(elements_count)) + (elements_count % 2)
self.bucket = bytearray(byte_count)
self.hash_functions = [
(lambda salt: (lambda x: (
int.from_bytes(
blake2b(x, digest_size=digest_size, salt=salt).digest(), "big",
)
% (byte_count * 8)
)))(salt)
for salt in [
i.to_bytes(1, "big")
for i in range(int(-math.log(error_rate) // math.log(2)) + 1)
]
]
def add(self, value: bytes) -> None:
hashs = [hash_fn(value) for hash_fn in self.hash_functions]
for num in hashs:
self.bucket[num // 8] |= 2 ** (num % 8)
def exists(self, value: bytes) -> bool:
hashs = [hash_fn(value) for hash_fn in self.hash_functions]
for num in hashs:
if not self.bucket[num // 8] & (2 ** (num % 8)):
return False
return True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment