Skip to content

Instantly share code, notes, and snippets.

@rohitdholakia
Created February 21, 2014 05:58
Show Gist options
  • Save rohitdholakia/9129519 to your computer and use it in GitHub Desktop.
Save rohitdholakia/9129519 to your computer and use it in GitHub Desktop.
Bloom Filter - 2** 12
import mmh3
import sys
import os
import math
with open(sys.argv[1]) as usernames:
MAX = 2 ** 12
bitarray = [0 for i in xrange(MAX)]
for line in usernames:
line = line.rstrip()
hash1 = mmh3.hash(line, 0)
hash2 = mmh3.hash(line, hash1)
for i in xrange(5):
bitarray[int(math.fabs(hash1 + i * hash2) % MAX)] = 1
def getPresence(key):
hash1 = mmh3.hash(key, 0)
hash2 = mmh3.hash(key, hash1)
for i in xrange(5):
if bitarray[int(math.fabs(hash1 + i*hash2) % MAX)] == 0:
return False
return True
with open(sys.argv[2]) as test_file:
for line in test_file:
print 'Is ', line.rstrip(), ' present in the ',getPresence(line.rstrip())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment