Skip to content

Instantly share code, notes, and snippets.

@tylerwx51
Created June 27, 2018 23:20
Show Gist options
  • Save tylerwx51/e42629d97e74aaab52a41eb3aa48aa45 to your computer and use it in GitHub Desktop.
Save tylerwx51/e42629d97e74aaab52a41eb3aa48aa45 to your computer and use it in GitHub Desktop.
Bloom Blog
class Bloom:
def __init__(self, m, k):
self.info = np.zeros(m, dtype='bool')
self.k = k
def insert(self, x):
f = double_hash(x, self.k, self.info.shape[0])
self.info[f] = True
def is_in(self, x):
f = double_hash(x, self.k, self.info.shape[0])
return np.all(self.info[f])
def __str__(self):
return f'Bloom(info={self.info}, k={self.k})'
def double_hash(delta, k, m):
x = hash1(delta) % m
y = hash2(delta) % m
f = np.zeros(k, 'int')
f[0] = 0
for i in range(1, k):
x = (x + y) % m
y = (y + i) % m
f[i] = x
return f
def triple_hash(delta, k, m):
x = hash1(delta)
y = hash2(delta)
z = hash3(delta)
f = np.zeros(k, 'int')
f[0] = 0
for i in range(1, k):
x = (x + y) % m
y = (y + z) % m
f[i] = x
return f
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment