Skip to content

Instantly share code, notes, and snippets.

@markostam
Last active December 2, 2016 21:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markostam/9150e6c27a758579ba07903bc0e96ecb to your computer and use it in GitHub Desktop.
Save markostam/9150e6c27a758579ba07903bc0e96ecb to your computer and use it in GitHub Desktop.
from bitarray import bitarray
class BloomFilter:
def __init__(self):
# use bitarray instead of list to save space
self.ba = bitarray(10**8)
def _hash_fxn1(self, x):
if type(x) == str:
b = format(ord(x), 'b')
else:
b = format(x, 'b')
y = (i[1] for i in enumerate(b) if i[0]%2 == 0)
y = int(''.join(y),2)
return y%11
def _hash_fxn2(self, x):
if type(x) == str:
b = (ord(x), 'b')
else:
b = format(x, 'b')
y = (i[1] for i in enumerate(b) if i[0]%2 != 0)
y = int(''.join(y),2)
return y%11
def _increment_ba(self, x):
self.ba[self._hash_fxn1(x)] = 1
self.ba[self._hash_fxn2(x)] = 1
def write(self, x):
for i in x:
self._increment_ba(i)
def check(self, x):
str_len = len(str(x))
total = 0
for i in x:
total += int(self.ba[self._hash_fxn2(i)]) + int(self.ba[self._hash_fxn1(i)])
if total/str_len/2 == 1:
print("Item is in the bloom filter")
else:
print("Item is not in the bloom filter")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment