Skip to content

Instantly share code, notes, and snippets.

@KatrinaE
Created April 10, 2014 01:26
Show Gist options
  • Save KatrinaE/10335539 to your computer and use it in GitHub Desktop.
Save KatrinaE/10335539 to your computer and use it in GitHub Desktop.
INIT_SIZE = 10
MAX_AVG_COLLISIONS = 1
FREQ_OF_COLLISIONS_CHECK = 0.67
class HashTable(object):
def __init__(self):
self.arr = self._create_array(INIT_SIZE)
self.insert_counter = 0
def _create_array(self, size):
return [[] for x in range(size)]
def __len__(self):
return len([x for x in self.arr if x is not None])
def insert(self, key, value):
hashed_index = self._hash_index(key)
self.arr[hashed_index].append((key,value))
self._handle_collisions()
def lookup(self, key):
hashed_index = self._hash_index(key)
collision_index = self._sequential_search(self.arr[hashed_index], key)
return self.arr[hashed_index][collision_index][1]
def delete(self, key):
hashed_index = self._hash_index(key)
try:
collision_index = self._sequential_search(self.arr[hashed_index], key)
del self.arr[hashed_index][collision_index]
except KeyError:
pass
def _handle_collisions(self):
self.insert_counter += 1
if self.insert_counter > len(self.arr):
if self._avg_collision_length() > MAX_AVG_COLLISIONS:
self._resize()
def _avg_collision_length(self):
return sum([len(x) for x in self.arr])/len(self.arr)
def _resize(self):
print "Avg # of collision before resize: " + str(self._avg_collision_length())
new_arr = self._create_array(len(self.arr)*2)
tmp_arr = self.arr
self.arr = new_arr
for collision_array in tmp_arr:
for item in collision_array:
self.insert(*item)
self.insert_counter = 0
print "Avg # of collision after resize: " + str(self._avg_collision_length())
def _sequential_search(self, collisions, key):
for i, x in enumerate(collisions):
if x[0] == key:
return i
else:
raise KeyError("Item not found")
def _hash_index(self, key):
hashed_key = current_hash(key)
hashed_index = hashed_key % len(self.arr)
return hashed_index
def test():
x = HashTable()
x.insert('foo','hi')
print x.lookup('foo') == 'hi'
x.insert('ofo','1')
print x.lookup('foo') == 'hi'
print x.lookup('ofo') == '1'
x.delete('ofo')
try:
x.lookup('ofo')
except KeyError:
print 'delete succeeded'
print x.delete('ofo') == None
print x.lookup('foo') == 'hi'
def test2():
x = HashTable()
x.insert('fooamskdlfalfkmds','foo')
x.insert('ofoasdddddddddddddfffff','ofo')
x.insert('oofcccccccccccccccc','oof')
x.insert('dddddddddddddd','bar')
x.insert('eeeeeeeeeeee','baz')
x.insert('fffffffffffffffffff','non')
x.insert('ggggggggggggggggggg','xyz')
x.insert('fooamskylfalfkmys','foo')
x.insert('ofoasyyyyyyyyyyyyyfffff','ofo')
x.insert('oofqqqqqqqqqqqqqqqq','oof')
x.insert('yyyyyyyyyyyyyy','ear')
x.insert('eeeadsfdsfeeeeeeeee','eaz')
x.insert('fffsadfadsfffffffffffffffff','non')
x.insert('aaaaaaaaaaaaaaaaaaa','xyz')
x.insert('fooamskdlfmmds','foo')
x.insert('ofdddddddfffff','ofo')
x.insert('oofcccccccqqqqccccccc','oof')
x.insert('ddddccccccddddd','bar')
x.insert('eeeewweeeeee','baz')
x.insert('ffffffeeeeeeeewefffffffff','non')
x.insert('gggggwwwwwgggwgggggg','xyz')
x.insert('ddddccccccddwddad','bar')
x.insert('eeeewweeeeawaaee','baz')
x.insert('ffffffeeeeweeeeaaaaefffffffff','non')
x.insert('gggggwwwwwgggggggsaaaagg','xyz')
x.insert('fooamskylfalfkmyss','foo')
x.insert('ofoasyyyyyyyyyysyyyfffff','ofo')
x.insert('oofqqqqqqqqqqqsqqqqq','oof')
x.insert('yyyyyyyyyyyyysy','ear')
x.insert('eeeadsfdsfeeeeseeeee','eaz')
x.insert('fffsadfadsfffsffffffffffffff','non')
x.insert('aaaaaaaaaaaasaaaaaaa','xyz')
x.insert('fooamskdlfmsmds','foo')
x.insert('ofdddddddfsffff','ofo')
x.insert('oofccccccscqqqqccccccc','oof')
x.insert('ddddccccccdsdddd','bar')
x.insert('eeeewweeeesee','baz')
x.insert('ffffffeeeeeeeseefffffffff','non')
x.insert('gggggwwwwwgggsgggggg','xyz')
x.insert('ddddccccccddsddad','bar')
x.insert('eeeewweeeeaasaee','baz')
x.insert('ffffffeeeeeeeeaaaaesfffffffff','non')
x.insert('gggggwwwwwgggggggaasaagg','xyz')
def current_hash(s):
return my_djb2(s)
def dumb_hash(s):
return sum([ord(p) for p in s])
def my_djb2(s):
result = 5381
for c in s:
result = ((result << 5) + result) + ord(c)
return result
if __name__ == '__main__':
test2()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment