Skip to content

Instantly share code, notes, and snippets.

@drbh
Last active April 17, 2020 19:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drbh/f349f090ee35c6e08c5d6b3a55491853 to your computer and use it in GitHub Desktop.
Save drbh/f349f090ee35c6e08c5d6b3a55491853 to your computer and use it in GitHub Desktop.
WIP implementation of typed hash tree
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"source": [
"import hashlib\n",
"import collections\n",
"import secrets\n",
"import base64 # we use base85 so our output hashes are short\n",
"\n",
"_Node = collections.namedtuple('Node', 'Type Label')\n",
"\n",
"class MyNode(_Node):\n",
"\tdef _pre_label(self):\n",
"\t\treturn ''.join([str(self.Type), str(self.Label)]).encode()\n",
"\t\t\n",
"\tdef _label(self):\n",
"\t\tprelabel = self._pre_label()\n",
"\t\treturn base64.b85encode(hashlib.sha256(prelabel).digest()).decode('utf-8')\n",
"\n",
"def _child_to_pre_label(left_node):\n",
" return ''.join(\n",
" [\n",
" str(len(left_node.Type)), left_node.Type,\n",
" str(len(str(left_node.Label))), str(left_node.Label)\n",
" ]\n",
" )\n",
"\n",
"def _build_pairs_parent(left_node, right_node):\n",
" pre_label_left = _child_to_pre_label(left_node)\n",
" pre_label_right = _child_to_pre_label(right_node)\n",
" pre_label = pre_label_left + pre_label_right\n",
"# internal_node = hashlib.sha256(pre_label.encode()).hexdigest()\t\n",
" internal_node = base64.b85encode(hashlib.sha256(pre_label.encode()).digest()).decode('utf-8')\t\n",
" return pre_label, internal_node\n",
"\n",
"def _construct_internals_from_bottom(nodes):\n",
" pre_lables = []\n",
" internal_nodes = []\n",
" it = iter(nodes)\n",
" for x in it:\n",
" left_node = x\n",
" right_node = next(it)\n",
" pre_label, internal = _build_pairs_parent(left_node, right_node)\n",
" pre_lables.append(pre_label)\n",
" internal_nodes.append(internal)\n",
" return pre_lables, internal_nodes\n",
"\n",
"def get_random_data():\n",
" return base64.b85encode(secrets.token_bytes(32)).decode('utf-8')"
],
"outputs": [],
"execution_count": 1,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"execution": {
"iopub.status.busy": "2020-04-17T19:42:12.149Z",
"iopub.execute_input": "2020-04-17T19:42:12.152Z",
"iopub.status.idle": "2020-04-17T19:42:12.155Z",
"shell.execute_reply": "2020-04-17T19:42:12.117Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Build a simple tree"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"nodes = [\n",
" # typed node I - with entropy\n",
" MyNode(\"name\", \"David\"),\n",
" MyNode(\"\", get_random_data()),\n",
" \n",
" # typed node II - with entropy\n",
" MyNode(\"age\", 40),\n",
" MyNode(\"\", get_random_data()),\n",
"]\n",
"\n",
"leaves = [n._label() for n in nodes]\n",
"# build internal nodes, using left leaf type and label encoding\n",
"pre_lables, internal_nodes = _construct_internals_from_bottom(nodes)\n",
"\n",
"nds = [MyNode(\"\", x) for x in internal_nodes]\n",
"root_pre_lables, root = _construct_internals_from_bottom(nds)\n",
"root"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 2,
"data": {
"text/plain": [
"['_E!wb0!~|K;tW;?F=kx&K7#IIsGw(b-0;Iz-n#K+']"
]
},
"metadata": {}
}
],
"execution_count": 2,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"execution": {
"iopub.status.busy": "2020-04-17T19:42:12.158Z",
"iopub.execute_input": "2020-04-17T19:42:12.161Z",
"iopub.status.idle": "2020-04-17T19:42:12.166Z",
"shell.execute_reply": "2020-04-17T19:42:12.119Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Show Whole Tree"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"MyNode(\"\", *root)"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 3,
"data": {
"text/plain": [
"MyNode(Type='', Label='_E!wb0!~|K;tW;?F=kx&K7#IIsGw(b-0;Iz-n#K+')"
]
},
"metadata": {}
}
],
"execution_count": 3,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"execution": {
"iopub.status.busy": "2020-04-17T19:42:12.170Z",
"iopub.execute_input": "2020-04-17T19:42:12.172Z",
"iopub.status.idle": "2020-04-17T19:42:12.178Z",
"shell.execute_reply": "2020-04-17T19:42:12.122Z"
}
}
},
{
"cell_type": "code",
"source": [
"nds"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 4,
"data": {
"text/plain": [
"[MyNode(Type='', Label='Y>mU>Rx<sLNwi{rR=7Qt%_^uU)s#b$hzzanGJWK?'),\n",
" MyNode(Type='', Label='rvg@I>Io`(0_Fx5xT@dIz@>^iG2UPAbREZze&6!v')]"
]
},
"metadata": {}
}
],
"execution_count": 4,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"execution": {
"iopub.status.busy": "2020-04-17T19:42:12.182Z",
"iopub.execute_input": "2020-04-17T19:42:12.185Z",
"iopub.status.idle": "2020-04-17T19:42:12.190Z",
"shell.execute_reply": "2020-04-17T19:42:12.126Z"
}
}
},
{
"cell_type": "code",
"source": [
"nodes"
],
"outputs": [
{
"output_type": "execute_result",
"execution_count": 5,
"data": {
"text/plain": [
"[MyNode(Type='name', Label='David'),\n",
" MyNode(Type='', Label='R|`EQpy3eG9u@lfd!>@loQhTi%$AsBo$LsZ2_uCd'),\n",
" MyNode(Type='age', Label=40),\n",
" MyNode(Type='', Label='^HlZ3<D7KwuYT%jx<p(I)AvgzLd-sqIMjj#QK9|)')]"
]
},
"metadata": {}
}
],
"execution_count": 5,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"execution": {
"iopub.status.busy": "2020-04-17T19:42:12.196Z",
"iopub.execute_input": "2020-04-17T19:42:12.199Z",
"iopub.status.idle": "2020-04-17T19:42:12.203Z",
"shell.execute_reply": "2020-04-17T19:42:12.129Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Pretty Print Leaves and Values"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"from pptree import *\n",
"import json\n",
"\n",
"\n",
"t = Node(root[0])\n",
"\n",
"nodes_for_tree = []\n",
"for index, node in enumerate(nds):\n",
" nodes_for_tree.append(Node(json.dumps(node._asdict()), t))\n",
" \n",
"it = iter(nodes)\n",
"count = 0\n",
"for x in it:\n",
" a = x\n",
" b = next(it)\n",
" parent = nodes_for_tree[count]\n",
" Node(json.dumps(a._asdict()), parent)\n",
" Node(json.dumps(b._asdict()), parent)\n",
" count += 1"
],
"outputs": [],
"execution_count": 6,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"execution": {
"iopub.status.busy": "2020-04-17T19:42:12.208Z",
"iopub.execute_input": "2020-04-17T19:42:12.211Z",
"iopub.status.idle": "2020-04-17T19:42:12.214Z",
"shell.execute_reply": "2020-04-17T19:42:12.132Z"
}
}
},
{
"cell_type": "code",
"source": [
"print_tree(t, horizontal=True)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" ┌{\"Type\": \"name\", \"Label\": \"David\"}\n",
" ┌{\"Type\": \"\", \"Label\": \"Y>mU>Rx<sLNwi{rR=7Qt%_^uU)s#b$hzzanGJWK?\"}┤\n",
" │ └{\"Type\": \"\", \"Label\": \"R|`EQpy3eG9u@lfd!>@loQhTi%$AsBo$LsZ2_uCd\"}\n",
" _E!wb0!~|K;tW;?F=kx&K7#IIsGw(b-0;Iz-n#K+┤\n",
" │ ┌{\"Type\": \"age\", \"Label\": 40}\n",
" └{\"Type\": \"\", \"Label\": \"rvg@I>Io`(0_Fx5xT@dIz@>^iG2UPAbREZze&6!v\"}┤\n",
" └{\"Type\": \"\", \"Label\": \"^HlZ3<D7KwuYT%jx<p(I)AvgzLd-sqIMjj#QK9|)\"}\n"
]
}
],
"execution_count": 7,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"execution": {
"iopub.status.busy": "2020-04-17T19:42:12.219Z",
"iopub.execute_input": "2020-04-17T19:42:12.222Z",
"iopub.status.idle": "2020-04-17T19:42:12.228Z",
"shell.execute_reply": "2020-04-17T19:42:12.136Z"
}
}
},
{
"cell_type": "markdown",
"source": [
"### Show internal nodes pre-labels"
],
"metadata": {
"nteract": {
"transient": {
"deleting": false
}
}
}
},
{
"cell_type": "code",
"source": [
"t = Node(root_pre_lables[0])\n",
"\n",
"nodes_for_tree = []\n",
"for index, node in enumerate(pre_lables):\n",
" nodes_for_tree.append(Node(node, t))"
],
"outputs": [],
"execution_count": 8,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"execution": {
"iopub.status.busy": "2020-04-17T19:42:12.233Z",
"iopub.execute_input": "2020-04-17T19:42:12.235Z",
"iopub.status.idle": "2020-04-17T19:42:12.238Z",
"shell.execute_reply": "2020-04-17T19:42:12.140Z"
}
}
},
{
"cell_type": "code",
"source": [
"print_tree(t, horizontal=True)"
],
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" ┌4name5David040R|`EQpy3eG9u@lfd!>@loQhTi%$AsBo$LsZ2_uCd\n",
" 040Y>mU>Rx<sLNwi{rR=7Qt%_^uU)s#b$hzzanGJWK?040rvg@I>Io`(0_Fx5xT@dIz@>^iG2UPAbREZze&6!v┤\n",
" └3age240040^HlZ3<D7KwuYT%jx<p(I)AvgzLd-sqIMjj#QK9|)\n"
]
}
],
"execution_count": 9,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
},
"execution": {
"iopub.status.busy": "2020-04-17T19:42:12.243Z",
"iopub.execute_input": "2020-04-17T19:42:12.246Z",
"iopub.status.idle": "2020-04-17T19:42:12.252Z",
"shell.execute_reply": "2020-04-17T19:42:12.269Z"
}
}
},
{
"cell_type": "code",
"source": [],
"outputs": [],
"execution_count": null,
"metadata": {
"collapsed": true,
"outputExpanded": false,
"jupyter": {
"source_hidden": false,
"outputs_hidden": false
},
"nteract": {
"transient": {
"deleting": false
}
}
}
}
],
"metadata": {
"kernel_info": {
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.7.3",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"kernelspec": {
"argv": [
"/Applications/Xcode.app/Contents/Developer/usr/bin/python3",
"-m",
"ipykernel_launcher",
"-f",
"{connection_file}"
],
"display_name": "Python 3",
"language": "python",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment