Skip to content

Instantly share code, notes, and snippets.

@ourway
Created December 27, 2015 13:10
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ourway/7fa9aefce098d6377a6d to your computer and use it in GitHub Desktop.
Save ourway/7fa9aefce098d6377a6d to your computer and use it in GitHub Desktop.
Dictionary implementation in pure Python
"""
A Python dict implementation.
"""
import collections
MINSIZE = 8
PERTURB_SHIFT = 5
dummy = "<dummy key>"
class Entry(object):
"""
A hash table entry.
Attributes:
* key - The key for this entry.
* hash - The has of the key.
* value - The value associated with the key.
"""
__slots__ = ("key", "value", "hash")
def __init__(self):
self.key = None
self.value = None
self.hash = 0
def __repr__(self):
return "<Entry: key={0} value={1}>".format(self.key, self.value)
class Dict(object):
"""
A mapping interface implemented as a hash table.
Attributes:
* used - The number of entires used in the table.
* filled - used + number of entries with a dummy key.
* table - List of entries; contains the actual dict data.
* mask - Length of table - 1. Used to fetch values.
"""
__slots__ = ("filled", "used", "mask", "table")
def __init__(self, arg=None, **kwargs):
self.clear()
self._update(arg, kwargs)
@classmethod
def fromkeys(cls, keys, value=0):
"""
Return a new dictionary from a sequence of keys.
"""
d = cls()
for key in keys:
d[key] = value
return d
def clear(self):
"""
Clear the dictionary of all data.
"""
self.filled = 0
self.used = 0
self.mask = MINSIZE - 1
self.table = []
# Initialize the table to a clean slate of entries.
for i in range(MINSIZE):
self.table.append(Entry())
def pop(self, *args):
"""
Remove and return the value for a key.
"""
have_default = len(args) == 2
try:
v = self[args[0]]
except KeyError:
if have_default:
return args[1]
raise
else:
del self[args[0]]
return v
def popitem(self):
"""
Remove and return any key-value pair from the dictionary.
"""
if self.used == 0:
raise KeyError("empty dictionary")
entry0 = self.table[0]
entry = entry0
i = 0
if entry0.value is None:
# The first entry in the table's hash is abused to hold the index to
# the next place to look for a value to pop.
i = entry0.hash
if i > self.mask or i < i:
i = 1
entry = self.table[i]
while entry.value is None:
i += 1
if i > self.mask:
i = 1
entry = self.table[i]
res = entry.key, entry.value
self._del(entry)
# Set the next place to start.
entry0.hash = i + 1
return res
def setdefault(self, key, default=0):
"""
If key is in the dictionary, return it. Otherwise, set it to the default
value.
"""
val = self._lookup(key).value
if val is None:
self[key] = default
return default
return val
def _lookup(self, key):
"""
Find the entry for a key.
"""
key_hash = hash(key)
i = key_hash & self.mask
entry = self.table[i]
if entry.key is None or entry is key:
return entry
free = None
if entry.key is dummy:
free = entry
elif entry.hash == key_hash and key == entry.key:
return entry
perturb = key_hash
while True:
i = (i << 2) + i + perturb + 1;
entry = self.table[i & self.mask]
if entry.key is None:
return entry if free is None else free
if entry.key is key or \
(entry.hash == key_hash and key == entry.key):
return entry
elif entry.key is dummy and free is None:
free = dummy
perturb >>= PERTURB_SHIFT
assert False, "not reached"
def _resize(self, minused):
"""
Resize the dictionary to at least minused.
"""
newsize = MINSIZE
# Find the smalled value for newsize.
while newsize <= minused and newsize > 0:
newsize <<= 1
oldtable = self.table
# Create a new table newsize long.
newtable = []
while len(newtable) < newsize:
newtable.append(Entry())
# Replace the old table.
self.table = newtable
self.used = 0
self.filled = 0
# Copy the old data into the new table.
for entry in oldtable:
if entry.value is not None:
self._insert_into_clean(entry)
elif entry.key is dummy:
entry.key = None
self.mask = newsize - 1
def _insert_into_clean(self, entry):
"""
Insert an item in a clean dict. This is a helper for resizing.
"""
i = entry.hash & self.mask
new_entry = self.table[i]
perturb = entry.hash
while new_entry.key is not None:
i = (i << 2) + i + perturb + 1
new_entry = self.table[i & self.mask]
perturb >>= PERTURB_SHIFT
new_entry.key = entry.key
new_entry.value = entry.value
new_entry.hash = entry.hash
self.used += 1
self.filled += 1
def _insert(self, key, value):
"""
Add a new value to the dictionary or replace an old one.
"""
entry = self._lookup(key)
if entry.value is None:
self.used += 1
if entry.key is not dummy:
self.filled += 1
entry.key = key
entry.hash = hash(key)
entry.value = value
def _del(self, entry):
"""
Mark an entry as free with the dummy key.
"""
entry.key = dummy
entry.value = None
self.used -= 1
def __getitem__(self, key):
value = self._lookup(key).value
if value is None:
# Check if we're a subclass.
if type(self) is not Dict:
# Try to call the __missing__ method.
missing = getattr(self, "__missing__")
if missing is not None:
return missing(key)
raise KeyError("no such key: {0!r}".format(key))
return value
def __setitem__(self, key, what):
# None is used as a marker for empty entries, so it can't be in a
# dictionary.
assert what is not None and key is not None, \
"key and value must not be None"
old_used = self.used
self._insert(key, what)
# Maybe resize the dict.
if not (self.used > old_used and
self.filled*3 >= (self.mask + 1)*2):
return
# Large dictionaries (< 5000) are only doubled in size.
factor = 2 if self.used > 5000 else 4
self._resize(factor*self.used)
def __delitem__(self, key):
entry = self._lookup(key)
if entry.value is None:
raise KeyError("no such key: {0!r}".format(key))
self._del(entry)
def __contains__(self, key):
"""
Check if a key is in the dictionary.
"""
return self._lookup(key).value is not None
def __eq__(self, other):
if not isinstance(other, Dict):
try:
# Try to coerce the other to a Dict, so we can compare it.
other = Dict(other)
except TypeError:
return NotImplemented
if self.used != other.used:
# They're not the same size.
return False
# Look through the table and compare every entry, breaking out early if
# we find a difference.
for entry in self.table:
if entry.value is not None:
try:
bval = other[entry.key]
except KeyError:
return False
if not bval == entry.value:
return False
return True
def __ne__(self, other):
return not self == other
def keys(self):
"""
Return a list of keys in the dictionary.
"""
return [entry.key for entry in self.table if entry.value is not None]
def values(self):
"""
Return a list of values in the dictionary.
"""
return [entry.value for entry in self.table if entry.value is not None]
def items(self):
"""
Return a list of key-value pairs.
"""
return [(entry.key, entry.value) for entry in self.table
if entry.value is not None]
def __iter__(self):
return DictKeysIterator(self)
def itervalues(self):
"""
Return an iterator over the values in the dictionary.
"""
return DictValuesIterator(self)
def iterkeys(self):
"""
Return an iterator over the keys in the dictionary.
"""
return DictKeysIterator(self)
def iteritems(self):
"""
Return an iterator over key-value pairs.
"""
return DictItemsIterator(self)
def _merge(self, mapping):
"""
Update the dictionary from a mapping.
"""
for key in mapping.keys():
self[key] = mapping[key]
def _from_sequence(self, seq):
for double in seq:
if len(double) != 2:
raise ValueError("{0!r} doesn't have a length of 2".format(
double))
self[double[0]] = double[1]
def _update(self, arg, kwargs):
if arg:
if isinstance(arg, collections.Mapping):
self._merge(arg)
else:
self._from_sequence(arg)
if kwargs:
self._merge(kwargs)
def update(self, arg=None, **kwargs):
"""
Update the dictionary from a mapping or sequence containing key-value
pairs. Any existing values are overwritten.
"""
self._update(arg, kwargs)
def get(self, key, default=0):
"""
Return the value for key if it exists otherwise the default.
"""
try:
return self[key]
except KeyError:
return default
def __len__(self):
return self.used
def __repr__(self):
r = ["{0!r} : {1!r}".format(k, v) for k, v in self.iteritems()]
return "Dict({" + ", ".join(r) + "})"
collections.Mapping.register(Dict)
class DictIterator(object):
def __init__(self, d):
self.d = d
self.used = self.d.used
self.len = self.d.used
self.pos = 0
def __iter__(self):
return self
def next(self):
# Check if the dictionary has been mutated under us.
if self.used != self.d.used:
# Make this state permanent.
self.used = -1
raise RuntimeError("dictionary size changed during interation")
i = self.pos
while i <= self.d.mask and self.d.table[i].value is None:
i += 1
self.pos = i + 1
if i > self.d.mask:
# We're done.
raise StopIteration
self.len -= 1
return self._extract(self.d.table[i])
__next__ = next
def _extract(self, entry):
return getattr(entry, self.kind)
def __len__(self):
return self.len
class DictKeysIterator(DictIterator):
kind = "key"
class DictValuesIterator(DictIterator):
kind = "value"
class DictItemsIterator(DictIterator):
def _extract(self, entry):
return entry.key, entry.value
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment