Created
August 6, 2020 22:08
-
-
Save gerrymanoim/3519567d6afae5361032c032e0cc44f2 to your computer and use it in GitHub Desktop.
Code from Modern "Python Dictionaries A confluence of a dozen great ideas - Raymond Hettinger"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import array | |
import collections | |
import itertools | |
# Placeholder constants | |
FREE = -1 | |
DUMMY = -2 | |
class Dict(collections.MutableMapping): | |
'Space efficient dictionary with fast iteration and cheap resizes.' | |
@staticmethod | |
def _gen_probes(hashvalue, mask): | |
'Same sequence of probes used in the current dictionary design' | |
PERTURB_SHIFT = 5 | |
if hashvalue < 0: | |
hashvalue = -hashvalue | |
i = hashvalue & mask | |
yield i | |
perturb = hashvalue | |
while True: | |
i = (5 * i + perturb + 1) & 0xFFFFFFFFFFFFFFFF | |
yield i & mask | |
perturb >>= PERTURB_SHIFT | |
def _lookup(self, key, hashvalue): | |
'Same lookup logic as currently used in real dicts' | |
assert self.filled < len(self.indices) # At least one open slot | |
freeslot = None | |
for i in self._gen_probes(hashvalue, len(self.indices)-1): | |
index = self.indices[i] | |
if index == FREE: | |
return (FREE, i) if freeslot is None else (DUMMY, freeslot) | |
elif index == DUMMY: | |
if freeslot is None: | |
freeslot = i | |
elif (self.keylist[index] is key or | |
self.hashlist[index] == hashvalue | |
and self.keylist[index] == key): | |
return (index, i) | |
@staticmethod | |
def _make_index(n): | |
'New sequence of indices using the smallest possible datatype' | |
if n <= 2**7: return array.array('b', [FREE]) * n # signed char | |
if n <= 2**15: return array.array('h', [FREE]) * n # signed short | |
if n <= 2**31: return array.array('l', [FREE]) * n # signed long | |
return [FREE] * n # python integers | |
def _resize(self, n): | |
'''Reindex the existing hash/key/value entries. | |
Entries do not get moved, they only get new indices. | |
No calls are made to hash() or __eq__(). | |
''' | |
n = 2 ** n.bit_length() # round-up to power-of-two | |
self.indices = self._make_index(n) | |
for index, hashvalue in enumerate(self.hashlist): | |
for i in Dict._gen_probes(hashvalue, n-1): | |
if self.indices[i] == FREE: | |
break | |
self.indices[i] = index | |
self.filled = self.used | |
def clear(self): | |
self.indices = self._make_index(8) | |
self.hashlist = [] | |
self.keylist = [] | |
self.valuelist = [] | |
self.used = 0 | |
self.filled = 0 # used + dummies | |
def __getitem__(self, key): | |
hashvalue = hash(key) | |
index, i = self._lookup(key, hashvalue) | |
if index < 0: | |
raise KeyError(key) | |
return self.valuelist[index] | |
def __setitem__(self, key, value): | |
hashvalue = hash(key) | |
index, i = self._lookup(key, hashvalue) | |
if index < 0: | |
self.indices[i] = self.used | |
self.hashlist.append(hashvalue) | |
self.keylist.append(key) | |
self.valuelist.append(value) | |
self.used += 1 | |
if index == FREE: | |
self.filled += 1 | |
if self.filled * 3 > len(self.indices) * 2: | |
self._resize(4 * len(self)) | |
else: | |
self.valuelist[index] = value | |
def __delitem__(self, key): | |
hashvalue = hash(key) | |
index, i = self._lookup(key, hashvalue) | |
if index < 0: | |
raise KeyError(key) | |
self.indices[i] = DUMMY | |
self.used -= 1 | |
# If needed, swap with the lastmost entry to avoid leaving a "hole" | |
if index != self.used: | |
lasthash = self.hashlist[-1] | |
lastkey = self.keylist[-1] | |
lastvalue = self.valuelist[-1] | |
lastindex, j = self._lookup(lastkey, lasthash) | |
assert lastindex >= 0 and i != j | |
self.indices[j] = index | |
self.hashlist[index] = lasthash | |
self.keylist[index] = lastkey | |
self.valuelist[index] = lastvalue | |
# Remove the lastmost entry | |
self.hashlist.pop() | |
self.keylist.pop() | |
self.valuelist.pop() | |
def __init__(self, *args, **kwds): | |
if not hasattr(self, 'keylist'): | |
self.clear() | |
self.update(*args, **kwds) | |
def __len__(self): | |
return self.used | |
def __iter__(self): | |
return iter(self.keylist) | |
def iterkeys(self): | |
return iter(self.keylist) | |
def keys(self): | |
return list(self.keylist) | |
def itervalues(self): | |
return iter(self.valuelist) | |
def values(self): | |
return list(self.valuelist) | |
def iteritems(self): | |
return itertools.izip(self.keylist, self.valuelist) | |
def items(self): | |
return zip(self.keylist, self.valuelist) | |
def __contains__(self, key): | |
index, i = self._lookup(key, hash(key)) | |
return index >= 0 | |
def get(self, key, default=None): | |
index, i = self._lookup(key, hash(key)) | |
return self.valuelist[index] if index >= 0 else default | |
def popitem(self): | |
if not self.keylist: | |
raise KeyError('popitem(): dictionary is empty') | |
key = self.keylist[-1] | |
value = self.valuelist[-1] | |
del self[key] | |
return key, value | |
def __repr__(self): | |
return 'Dict(%r)' % self.items() | |
def show_structure(self): | |
'Diagnostic method. Not part of the API.' | |
print '=' * 50 | |
print self | |
print 'Indices:', self.indices | |
for i, row in enumerate(zip(self.hashlist, self.keylist, self.valuelist)): | |
print i, row | |
print '-' * 50 | |
if __name__ == '__main__': | |
d = Dict([('timmy', 'red'), ('barry', 'green'), ('guido', 'blue')]) | |
d.show_structure() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Code shown at the end of https://www.youtube.com/watch?v=npw4s1QTmPg&app=desktop
via:
Actual dictionary implementation: https://github.com/python/cpython/blob/master/Objects/dictobject.c