Skip to content

Instantly share code, notes, and snippets.

@tippenein
Last active December 19, 2015 12:09
Show Gist options
  • Save tippenein/5952751 to your computer and use it in GitHub Desktop.
Save tippenein/5952751 to your computer and use it in GitHub Desktop.
slightly ugly, but useful bloomfilter
from random import Random
class BloomFilter:
# http://en.wikipedia.org/wiki/Bloom_filter
def __init__(self, num_bytes, num_probes, iterable=set()):
self.array = bytearray(num_bytes)
self.num_probes = num_probes
self.num_bins = num_bytes * 8
self.update(iterable)
self.iterable = iterable
def _add(self, value):
self.iterable.add(value)
self.update(self.iterable)
def get_probes(self, key):
random = Random(key).random
return (int(random() * self.num_bins) for _ in range(self.num_probes))
def update(self, keys):
for key in keys:
for i in self.get_probes(key):
self.array[i//8] |= 2 ** (i%8)
def __contains__(self, key):
return all(self.array[i//8] & (2 ** (i%8)) for i in self.get_probes(key))
if __name__ == "__main__":
s = set([x for x in xrange(0,1001)])
bloom = BloomFilter(100,3, iterable=s)
bloom._add(1001)
result = 1001 in bloom
print result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment