Last active
April 17, 2020 19:44
-
-
Save drbh/f349f090ee35c6e08c5d6b3a55491853 to your computer and use it in GitHub Desktop.
WIP implementation of typed hash tree
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"source": [ | |
"import hashlib\n", | |
"import collections\n", | |
"import secrets\n", | |
"import base64 # we use base85 so our output hashes are short\n", | |
"\n", | |
"_Node = collections.namedtuple('Node', 'Type Label')\n", | |
"\n", | |
"class MyNode(_Node):\n", | |
"\tdef _pre_label(self):\n", | |
"\t\treturn ''.join([str(self.Type), str(self.Label)]).encode()\n", | |
"\t\t\n", | |
"\tdef _label(self):\n", | |
"\t\tprelabel = self._pre_label()\n", | |
"\t\treturn base64.b85encode(hashlib.sha256(prelabel).digest()).decode('utf-8')\n", | |
"\n", | |
"def _child_to_pre_label(left_node):\n", | |
" return ''.join(\n", | |
" [\n", | |
" str(len(left_node.Type)), left_node.Type,\n", | |
" str(len(str(left_node.Label))), str(left_node.Label)\n", | |
" ]\n", | |
" )\n", | |
"\n", | |
"def _build_pairs_parent(left_node, right_node):\n", | |
" pre_label_left = _child_to_pre_label(left_node)\n", | |
" pre_label_right = _child_to_pre_label(right_node)\n", | |
" pre_label = pre_label_left + pre_label_right\n", | |
"# internal_node = hashlib.sha256(pre_label.encode()).hexdigest()\t\n", | |
" internal_node = base64.b85encode(hashlib.sha256(pre_label.encode()).digest()).decode('utf-8')\t\n", | |
" return pre_label, internal_node\n", | |
"\n", | |
"def _construct_internals_from_bottom(nodes):\n", | |
" pre_lables = []\n", | |
" internal_nodes = []\n", | |
" it = iter(nodes)\n", | |
" for x in it:\n", | |
" left_node = x\n", | |
" right_node = next(it)\n", | |
" pre_label, internal = _build_pairs_parent(left_node, right_node)\n", | |
" pre_lables.append(pre_label)\n", | |
" internal_nodes.append(internal)\n", | |
" return pre_lables, internal_nodes\n", | |
"\n", | |
"def get_random_data():\n", | |
" return base64.b85encode(secrets.token_bytes(32)).decode('utf-8')" | |
], | |
"outputs": [], | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
}, | |
"execution": { | |
"iopub.status.busy": "2020-04-17T19:42:12.149Z", | |
"iopub.execute_input": "2020-04-17T19:42:12.152Z", | |
"iopub.status.idle": "2020-04-17T19:42:12.155Z", | |
"shell.execute_reply": "2020-04-17T19:42:12.117Z" | |
} | |
} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Build a simple tree" | |
], | |
"metadata": { | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"nodes = [\n", | |
" # typed node I - with entropy\n", | |
" MyNode(\"name\", \"David\"),\n", | |
" MyNode(\"\", get_random_data()),\n", | |
" \n", | |
" # typed node II - with entropy\n", | |
" MyNode(\"age\", 40),\n", | |
" MyNode(\"\", get_random_data()),\n", | |
"]\n", | |
"\n", | |
"leaves = [n._label() for n in nodes]\n", | |
"# build internal nodes, using left leaf type and label encoding\n", | |
"pre_lables, internal_nodes = _construct_internals_from_bottom(nodes)\n", | |
"\n", | |
"nds = [MyNode(\"\", x) for x in internal_nodes]\n", | |
"root_pre_lables, root = _construct_internals_from_bottom(nds)\n", | |
"root" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 2, | |
"data": { | |
"text/plain": [ | |
"['_E!wb0!~|K;tW;?F=kx&K7#IIsGw(b-0;Iz-n#K+']" | |
] | |
}, | |
"metadata": {} | |
} | |
], | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
}, | |
"execution": { | |
"iopub.status.busy": "2020-04-17T19:42:12.158Z", | |
"iopub.execute_input": "2020-04-17T19:42:12.161Z", | |
"iopub.status.idle": "2020-04-17T19:42:12.166Z", | |
"shell.execute_reply": "2020-04-17T19:42:12.119Z" | |
} | |
} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Show Whole Tree" | |
], | |
"metadata": { | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"MyNode(\"\", *root)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 3, | |
"data": { | |
"text/plain": [ | |
"MyNode(Type='', Label='_E!wb0!~|K;tW;?F=kx&K7#IIsGw(b-0;Iz-n#K+')" | |
] | |
}, | |
"metadata": {} | |
} | |
], | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
}, | |
"execution": { | |
"iopub.status.busy": "2020-04-17T19:42:12.170Z", | |
"iopub.execute_input": "2020-04-17T19:42:12.172Z", | |
"iopub.status.idle": "2020-04-17T19:42:12.178Z", | |
"shell.execute_reply": "2020-04-17T19:42:12.122Z" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"nds" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 4, | |
"data": { | |
"text/plain": [ | |
"[MyNode(Type='', Label='Y>mU>Rx<sLNwi{rR=7Qt%_^uU)s#b$hzzanGJWK?'),\n", | |
" MyNode(Type='', Label='rvg@I>Io`(0_Fx5xT@dIz@>^iG2UPAbREZze&6!v')]" | |
] | |
}, | |
"metadata": {} | |
} | |
], | |
"execution_count": 4, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
}, | |
"execution": { | |
"iopub.status.busy": "2020-04-17T19:42:12.182Z", | |
"iopub.execute_input": "2020-04-17T19:42:12.185Z", | |
"iopub.status.idle": "2020-04-17T19:42:12.190Z", | |
"shell.execute_reply": "2020-04-17T19:42:12.126Z" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"nodes" | |
], | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"execution_count": 5, | |
"data": { | |
"text/plain": [ | |
"[MyNode(Type='name', Label='David'),\n", | |
" MyNode(Type='', Label='R|`EQpy3eG9u@lfd!>@loQhTi%$AsBo$LsZ2_uCd'),\n", | |
" MyNode(Type='age', Label=40),\n", | |
" MyNode(Type='', Label='^HlZ3<D7KwuYT%jx<p(I)AvgzLd-sqIMjj#QK9|)')]" | |
] | |
}, | |
"metadata": {} | |
} | |
], | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
}, | |
"execution": { | |
"iopub.status.busy": "2020-04-17T19:42:12.196Z", | |
"iopub.execute_input": "2020-04-17T19:42:12.199Z", | |
"iopub.status.idle": "2020-04-17T19:42:12.203Z", | |
"shell.execute_reply": "2020-04-17T19:42:12.129Z" | |
} | |
} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Pretty Print Leaves and Values" | |
], | |
"metadata": { | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"from pptree import *\n", | |
"import json\n", | |
"\n", | |
"\n", | |
"t = Node(root[0])\n", | |
"\n", | |
"nodes_for_tree = []\n", | |
"for index, node in enumerate(nds):\n", | |
" nodes_for_tree.append(Node(json.dumps(node._asdict()), t))\n", | |
" \n", | |
"it = iter(nodes)\n", | |
"count = 0\n", | |
"for x in it:\n", | |
" a = x\n", | |
" b = next(it)\n", | |
" parent = nodes_for_tree[count]\n", | |
" Node(json.dumps(a._asdict()), parent)\n", | |
" Node(json.dumps(b._asdict()), parent)\n", | |
" count += 1" | |
], | |
"outputs": [], | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
}, | |
"execution": { | |
"iopub.status.busy": "2020-04-17T19:42:12.208Z", | |
"iopub.execute_input": "2020-04-17T19:42:12.211Z", | |
"iopub.status.idle": "2020-04-17T19:42:12.214Z", | |
"shell.execute_reply": "2020-04-17T19:42:12.132Z" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"print_tree(t, horizontal=True)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
" ┌{\"Type\": \"name\", \"Label\": \"David\"}\n", | |
" ┌{\"Type\": \"\", \"Label\": \"Y>mU>Rx<sLNwi{rR=7Qt%_^uU)s#b$hzzanGJWK?\"}┤\n", | |
" │ └{\"Type\": \"\", \"Label\": \"R|`EQpy3eG9u@lfd!>@loQhTi%$AsBo$LsZ2_uCd\"}\n", | |
" _E!wb0!~|K;tW;?F=kx&K7#IIsGw(b-0;Iz-n#K+┤\n", | |
" │ ┌{\"Type\": \"age\", \"Label\": 40}\n", | |
" └{\"Type\": \"\", \"Label\": \"rvg@I>Io`(0_Fx5xT@dIz@>^iG2UPAbREZze&6!v\"}┤\n", | |
" └{\"Type\": \"\", \"Label\": \"^HlZ3<D7KwuYT%jx<p(I)AvgzLd-sqIMjj#QK9|)\"}\n" | |
] | |
} | |
], | |
"execution_count": 7, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
}, | |
"execution": { | |
"iopub.status.busy": "2020-04-17T19:42:12.219Z", | |
"iopub.execute_input": "2020-04-17T19:42:12.222Z", | |
"iopub.status.idle": "2020-04-17T19:42:12.228Z", | |
"shell.execute_reply": "2020-04-17T19:42:12.136Z" | |
} | |
} | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"### Show internal nodes pre-labels" | |
], | |
"metadata": { | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"t = Node(root_pre_lables[0])\n", | |
"\n", | |
"nodes_for_tree = []\n", | |
"for index, node in enumerate(pre_lables):\n", | |
" nodes_for_tree.append(Node(node, t))" | |
], | |
"outputs": [], | |
"execution_count": 8, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
}, | |
"execution": { | |
"iopub.status.busy": "2020-04-17T19:42:12.233Z", | |
"iopub.execute_input": "2020-04-17T19:42:12.235Z", | |
"iopub.status.idle": "2020-04-17T19:42:12.238Z", | |
"shell.execute_reply": "2020-04-17T19:42:12.140Z" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"print_tree(t, horizontal=True)" | |
], | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
" ┌4name5David040R|`EQpy3eG9u@lfd!>@loQhTi%$AsBo$LsZ2_uCd\n", | |
" 040Y>mU>Rx<sLNwi{rR=7Qt%_^uU)s#b$hzzanGJWK?040rvg@I>Io`(0_Fx5xT@dIz@>^iG2UPAbREZze&6!v┤\n", | |
" └3age240040^HlZ3<D7KwuYT%jx<p(I)AvgzLd-sqIMjj#QK9|)\n" | |
] | |
} | |
], | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
}, | |
"execution": { | |
"iopub.status.busy": "2020-04-17T19:42:12.243Z", | |
"iopub.execute_input": "2020-04-17T19:42:12.246Z", | |
"iopub.status.idle": "2020-04-17T19:42:12.252Z", | |
"shell.execute_reply": "2020-04-17T19:42:12.269Z" | |
} | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"outputs": [], | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true, | |
"outputExpanded": false, | |
"jupyter": { | |
"source_hidden": false, | |
"outputs_hidden": false | |
}, | |
"nteract": { | |
"transient": { | |
"deleting": false | |
} | |
} | |
} | |
} | |
], | |
"metadata": { | |
"kernel_info": { | |
"name": "python3" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.7.3", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
}, | |
"kernelspec": { | |
"argv": [ | |
"/Applications/Xcode.app/Contents/Developer/usr/bin/python3", | |
"-m", | |
"ipykernel_launcher", | |
"-f", | |
"{connection_file}" | |
], | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment