Last active
August 29, 2015 14:17
-
-
Save HarryR/b54c2ac81a53f500756d to your computer and use it in GitHub Desktop.
Hashed tree with 4bit levels and random distribution
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__all__ = ('treedel', 'treemap', 'treeadd') | |
from hashlib import sha1 | |
def hashname(*args): | |
hashed = sha1() | |
for arg in args: | |
hashed.update(str(arg)) | |
return hashed.hexdigest() | |
def treedel(tree, name): | |
hashed_name = hashname(name) | |
prefix = '' | |
prev = [] | |
while type(tree) is dict: | |
bucket = hashed_name[:1] | |
if bucket not in tree: | |
return False | |
if type(tree[bucket]) is dict: | |
if len(hashed_name) <= 1: | |
raise RuntimeError("Tree too deep...") | |
prev.append((bucket,tree)) | |
tree = tree[bucket] | |
hashed_name = hashed_name[1:] | |
prefix += bucket | |
continue | |
break | |
del tree[bucket] | |
# Cleanup empty nodes after removal | |
for bucket, subtree in prev[::-1]: | |
if bucket in subtree and type(subtree[bucket]) is dict and len(subtree[bucket]) == 0: | |
del subtree[bucket] | |
return True | |
def treemap(tree, key): | |
hashed = hashname(key) | |
prefix = '' | |
while type(tree) is dict: | |
bucket = hashed[:1] | |
bktid = long(bucket, 16) | |
if bucket in tree: | |
if type(tree[bucket]) is dict: | |
if len(hashed) <= 1: | |
raise RuntimeError("Tree too deep...") | |
tree = tree[bucket] | |
hashed = hashed[1:] | |
prefix += bucket | |
continue | |
return tree[bucket] | |
ret = tree | |
while type(ret) is dict: | |
prev = None | |
cur = None | |
for key, val in ret.items(): | |
cur = val | |
if prev is None: | |
prev = key | |
if int(key, 16) > bktid: | |
ret = prev | |
ret = cur | |
return ret | |
assert False | |
def treeadd(tree, name, val=None, hashed_name=None, prefix=''): | |
assert type(tree) is dict | |
if hashed_name is None: | |
hashed_name = hashname(name) | |
bucket = hashed_name[:1] | |
bktid = int(bucket, 16) | |
if bucket not in tree: | |
if type(tree) is not dict: | |
print 'FAIL', tree | |
tree[bucket] = (name, val) | |
return (prefix + bucket, name) | |
if type(tree[bucket]) is tuple: | |
node = tree[bucket] | |
if node[0] != name: | |
A_hash = hashname(node[0])[len(prefix+bucket):][0] | |
B_hash = hashname(name)[len(prefix+bucket):][0] | |
tree[bucket] = { | |
A_hash: node, | |
B_hash: (name, val) | |
} | |
return (prefix + bucket, name) | |
return (prefix + bucket, name) | |
if len(hashed_name) <= 1: | |
raise RuntimeError("Tree too deep...") | |
return treeadd(tree[bucket], name, val, hashed_name[1:], prefix + bucket) | |
def treeiter(tree, prefix=''): | |
for key, val in tree.items(): | |
if type(val) is dict: | |
for item in treeiter(val, ''.join((prefix, key))): | |
yield item | |
else: | |
yield val | |
if __name__ == "__main__": | |
from time import time | |
now = int(time()) | |
tree = {} | |
nodes = [] | |
for node in [1,2,3,4,5,6,7,8,9]: | |
for i in range(1, 10): | |
name = "node%d-slice%d" % (now+node,i) | |
nodes.append(name) | |
treeadd(tree, name, None) | |
print treemap(tree, name) | |
assert treemap(tree, name)[0] == name | |
print "" | |
print "Tree top-level" | |
print "----------------------" | |
for X in tree.keys(): | |
print "%s - %s" % (X, tree[X][0] if type(tree[X]) is tuple else tree[X].keys()) | |
print "" | |
print "Iterate Tree" | |
print "----------------------" | |
i = 0 | |
for Y in treeiter(tree): | |
print Y | |
i += 1 | |
if i > 50: | |
break | |
print "" | |
print "Tree lookup" | |
print "----------------------" | |
ndis = {} | |
from random import getrandbits | |
for i in range(1,50000): | |
name = "derp%d" % (now%i+getrandbits(32),) | |
res = treemap(tree, name) | |
assert res is not None | |
assert res == treemap(tree, name) | |
if i < 50: | |
print "%s = %s" % (name, res) | |
node = res[0].split('-',1)[0] | |
if node not in ndis: | |
ndis[node] = 1 | |
else: | |
ndis[node] += 1 | |
print "" | |
print "Lookup Distribution" | |
print "----------------------" | |
for N, C in ndis.items(): | |
print "%s - %d" % (N, C) | |
print "" | |
print "Deletion" | |
print "----------------------" | |
i = 0 | |
oldcount = sum( 1 for _ in treeiter(tree)) | |
for Y in treeiter(tree): | |
assert treemap(tree, Y[0]) == Y | |
assert treedel(tree, Y[0]) is True | |
assert treemap(tree, Y[0]) != Y | |
treeadd(tree, Y[0]) | |
assert treedel(tree, Y[0]) is True | |
i += 1 | |
if i >= 15: | |
break | |
newcount = sum( 1 for _ in treeiter(tree)) | |
assert newcount == oldcount - 15 | |
print "OK" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment