Skip to content

Instantly share code, notes, and snippets.

@sing1ee
Created July 11, 2012 02:15
Show Gist options
  • Save sing1ee/3087515 to your computer and use it in GitHub Desktop.
Save sing1ee/3087515 to your computer and use it in GitHub Desktop.
implement dict by murmur hash
# !/usr/bin/python
# -*- encoding: utf-8 -*-
import mmh3
class MurmurDict:
def __init__(self, capacity = 16, load_factor = 0.75):
self.capacity = capacity
self.load_factor = load_factor
self.table = []
for i in range(self.capacity):
self.table.append([])
self.rate = 0.1
self.size = 0
def __hash_index__(self, hash_code = 1):
return hash_code & (self.capacity - 1)
def __resize__(self):
self.capacity = self.capacity + int(self.capacity / 2)
new_table = []
for i in range(self.capacity):
new_table.append([])
for item in self.table:
for (key, value) in item:
new_table[self.__hash_index__(mmh3.hash(key))].append((key, value))
self.table = new_table
def check_size(self):
return self.size * 1.0 / self.capacity > self.load_factor
def put(self, key, value):
self.size += 1
if self.check_size():
self.__resize__()
idx = self.__hash_index__(mmh3.hash(key))
print 'idx=%d\tsize=%d\tcapacity=%d' %(idx, self.size, self.capacity)
self.table[idx].append((key, value))
return
def contains(self, key):
list = self.table[self.__hash_index__(mmh3.hash(key))]
return len(list) != 0
def get(self, lookup_key):
list = self.table[self.__hash_index__(mmh3.hash(lookup_key))]
for (key, value) in list:
if key == lookup_key:
return value
return None
def size(self):
return self.size
if __name__ == '__main__':
murmur_dict = MurmurDict()
for item in range(100):
key = '%d' %item
murmur_dict.put(key, item)
for item in range(100):
key = '%d' %item
print murmur_dict.get(key)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment