Skip to content

Instantly share code, notes, and snippets.

@iEverX
Last active December 22, 2015 17:29
Show Gist options
  • Save iEverX/6506500 to your computer and use it in GitHub Desktop.
Save iEverX/6506500 to your computer and use it in GitHub Desktop.
Simple bloom filter, just for learning. Need bitarray support
# -*- coding: utf-8 -*-
import os
from hashlib import md5
from bitarray import bitarray
class BloomFilter(object):
def __init__(self, count, size, hash_count=8):
self.count = count
self.size = size
self.bits_length = count * size
self.bits = bitarray(self.bits_length)
self.bits.setall(0)
self._seeds = [os.urandom(8) for x in xrange(hash_count)]
def add(self, v):
for seed in self._seeds:
pos = self._alloc(self._hash(v, seed))
self.bits[pos] = 1
def check(self, v):
for seed in self._seeds:
pos = self._alloc(self._hash(v, seed))
if not self.bits[pos]:
return False
return True
def _hash(self, v, seed):
m = md5(seed)
m.update(v)
return int(m.hexdigest(), 16)
def _alloc(self, hash):
return hash % self.bits_length
def _print(self):
print self.bits
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment