Skip to content

Instantly share code, notes, and snippets.

@quiver
Created September 29, 2012 02:29
Show Gist options
  • Save quiver/3802946 to your computer and use it in GitHub Desktop.
Save quiver/3802946 to your computer and use it in GitHub Desktop.
counting bloom filter
# http://en.wikipedia.org/wiki/Bloom_filter#Counting_filters
# http://www.michaelnielsen.org/ddi/why-bloom-filters-work-the-way-they-do/
import hashlib
class BloomFilter(object):
def __init__(self, m, k):
self.m = m
self.k = k
self.array = [0 for i in range(m)]
#import scipy # if scipy installed
#self.array = scipy.zeros(m, dtype=int)
def __str__(self):
return self.array.__str__()
def add(self, key):
for i in self._hashes(key):
self.array[i] += 1
def remove(self, key):
if self.contains(key):
for i in self._hashes(key):
self.array[i] -= 1
def contains(self, key):
return all(self.array[i] for i in self._hashes(key))
def _hashes(self, key):
"""
generate k different hashes, each of which maps a key to one of
the m array positions with a uniform random distribution
"""
h = hashlib.new('md5')
h.update(str(key))
x = 0
for _ in xrange(self.k):
if x < self.m:
h.update('.')
x = long(h.hexdigest(), 16)
x, y = divmod(x, self.m)
yield y
def test_bloom(m, k, n):
b = BloomFilter(m, k)
for i in xrange(n):
b.add(i)
assert b.contains(i)
print b
b.remove(3)
print b
if __name__ == '__main__':
test_bloom(100, 3, 10)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment