Skip to content

Instantly share code, notes, and snippets.

@pyrocat101
Created October 8, 2014 00:37
Show Gist options
  • Save pyrocat101/f36240f8cdb4bee8356b to your computer and use it in GitHub Desktop.
Save pyrocat101/f36240f8cdb4bee8356b to your computer and use it in GitHub Desktop.
Naïve implementation of chained hash table, just for proof of concept.
class Bucket(object):
def __init__(self):
self.next = None
class Entry(object):
def __init__(self, key, val):
self.key = key
self.val = val
self.next = None
class Hashtbl(object):
def __init__(self, iterable=None):
self._size = 16L
self._length = 0
self._buckets = [Bucket() for _ in range(self._size)]
if iterable is not None:
for k, v in iterable:
self.__setitem__(k, v)
def __setitem__(self, key, val):
bucket = self._buckets[hash(key) % self._size]
# search entries in the bucket
current = bucket
while current.next is not None:
next_ = current.next
if next_.key == key:
# modify in-place and return
next_.val = val
return
current = current.next
# need to add new entry
if self._length == self._size:
self._expand()
# bucket changed after rehashing
bucket = self._buckets[hash(key) % self._size]
self._add_to_bucket(bucket, key, val)
self._length += 1
def _add_to_bucket(self, bucket, key, val):
# insert new entry in front of the entry list
tail = bucket.next
entry = Entry(key, val)
entry.next = tail
bucket.next = entry
def _expand(self):
# rehashing of all items
new_size = self._size * 2
new_buckets = [Bucket() for _ in range(new_size)]
for k, v in self.iteritems():
bucket = new_buckets[hash(k) % new_size]
self._add_to_bucket(bucket, k, v)
# replace bucket
self._buckets = new_buckets
self._size = new_size
def iteritems(self):
for bucket in self._buckets:
current = bucket.next
while current is not None:
yield current.key, current.val
current = current.next
def iterkeys(self):
for k, _ in self.itervalues():
yield k
def itervalues(self):
for _, v in self.itervalues():
yield v
def items(self):
return list(self.iteritems())
def keys(self):
return list(self.iterkeys())
def values(self):
return list(self.itervalues())
def __iter__(self):
for item in self.iterkeys():
yield item
def _get_entry(self, key):
bucket = self._buckets[hash(key) % self._size]
# search entries in the bucket
current = bucket.next
while current is not None:
if current.key == key:
return current
current = current.next
else:
return None
def __getitem__(self, key):
entry = self._get_entry(key)
if entry is not None:
return entry.val
else:
raise KeyError(key)
def __contains__(self, key):
return self._get_entry(key) is not None
def __delitem__(self, key):
bucket = self._buckets[hash(key) % self._size]
current = bucket
while current.next is not None:
next_ = current.next
if next_.key == key:
current.next = next_.next
self._length -= 1
return
current = current.next
raise KeyError(key)
def get(self, key, default=None):
entry = self._get_entry(key)
if entry is not None:
return entry.val
else:
return default
def __len__(self):
return self._length
import random
# randomized test
for _ in range(100):
ht = Hashtbl()
num_entries = random.randint(1, 500)
kvs = [(random.random(), random.random()) for _ in range(num_entries)]
for k, v in kvs:
ht[k] = v
# test get items
for k, v in kvs:
assert k in ht
assert (k + 1) not in ht
assert ht[k] == v
# in-place modify
for k, v in kvs:
ht[k] += 1
assert ht[k] == v + 1
# test deletion
for k, _ in kvs:
del ht[k]
assert k not in ht
assert len(ht) == 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment