Skip to content

Instantly share code, notes, and snippets.

@luispedro
Created August 7, 2010 22:07
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save luispedro/513251 to your computer and use it in GitHub Desktop.
Save luispedro/513251 to your computer and use it in GitHub Desktop.
Bloom filter in python
import numpy as np
def insert_bit(array, index):
inner = (index % 64)
index //= 64
array[index] |= (1 << inner)
def is_set(array, index):
inner = (index % 64)
index //= 64
return array[index] & (1 << inner)
class BloomFilter(object):
def __init__(self, N, Nbits):
self.Nbits = Nbits
self.bloom = np.zeros(Nbits//64 + bool(Nbits % 64), np.uint64)
self.k = int(np.round(.7 *Nbits/N))
def hash(self, s, index):
return (hash(s + ('x' * index)) % self.Nbits)
def insert(self, s):
for h in xrange(self.k):
insert_bit(self.bloom, self.hash(s, h))
def __contains__(self, s):
for h in xrange(self.k):
if not is_set(self.bloom, self.hash(s, h)):
return False
return True
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment