Skip to content

Instantly share code, notes, and snippets.

@shanecandoit
Last active March 3, 2023 23:52
Show Gist options
  • Save shanecandoit/7ed9cc88936a9088d98deca278ff2542 to your computer and use it in GitHub Desktop.
Save shanecandoit/7ed9cc88936a9088d98deca278ff2542 to your computer and use it in GitHub Desktop.
python_list_tokens_hashing
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import hashlib # use sha256"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"tokenize turns a string, or file, into a list of tokens\n",
"flat list no depth"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def tokenize(source:str) -> list:\n",
" \"\"\"Tokenizes the source string into a list of tokens\"\"\"\n",
" source = source \\\n",
" .replace('(', ' ( ').replace(')', ' ) ') \\\n",
" .replace('[', ' ( ').replace(']', ' ) ')\n",
" return source.split()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"an example of a function, which is a nested list\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['(', 'define', '(', 'square', 'x', ')', '(', '*', 'x', 'x', ')', ')']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"r = tokenize('(define (square x) (* x x))')\n",
"r"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"a simple expression, not a nested list\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['(', '+', '1', '2', '3', ')']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t = tokenize('[+ 1 2 3]')\n",
"t"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def sha256(s:str) -> bytes:\n",
" \"\"\"Returns the SHA256 hash of the input\"\"\"\n",
" import hashlib\n",
" return hashlib.sha256(s.encode('utf-8')).hexdigest()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['32ebb1abcc1c601ceb9c4e3c4faba0caa5b85bb98c4f1e6612c40faa528a91c9',\n",
" 'a318c24216defe206feeb73ef5be00033fa9c4a74d0b967f6532a26ca5906d3b',\n",
" '6b86b273ff34fce19d6b804eff5a3f5747ada4eaa22f1d49c01e52ddb7875b4b',\n",
" 'd4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35',\n",
" '4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce',\n",
" 'ba5ec51d07a4ac0e951608704431d59a02b21a4e951acc10505a8dc407c501ee']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"t_hash = [sha256(t) for t in t]\n",
"t_hash"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"hash_map: {'*': '684888c0ebb17f374298b65ee2807526c066094c701bcc7ebbe1c1095f494fc1', '2': 'd4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35', '3': '4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce'}\n",
"hashes: ['684888c0ebb17f374298b65ee2807526c066094c701bcc7ebbe1c1095f494fc1', 'd4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35', '4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce']\n",
"hash_of_map: 137a5e74a65d4a4306bd5474ba4e68269cdac3547a1647f085e3d784c436dfdb\n",
"hash_chain: [('4e07408562bedb8b60ce05c1decfe3ad16b72230967de01f640b7e4729b49fce', 'd4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35', 'ef3b42c1086b0493172457c03f4ddf2dcd0e69a9009e131e90196ee06cbc44be'), ('d4735e3a265e16eee03f59718b9b5d03019c07d8b6c51f90da3a666eec13ab35', '684888c0ebb17f374298b65ee2807526c066094c701bcc7ebbe1c1095f494fc1', 'a93e57189011b4a14fcd8f6b08825e8ddcf95d87c8d5cc3bda768b7b91fa040f')]\n",
"a93e57189011b4a14fcd8f6b08825e8ddcf95d87c8d5cc3bda768b7b91fa040f\n"
]
}
],
"source": [
"# make a hash of a list\n",
"def hash_of_list(tokens:list) -> str:\n",
" \"\"\"Returns the SHA256 hash of the input list\"\"\"\n",
" if tokens[0] == '(':\n",
" tokens = tokens[1:-1]\n",
" if tokens[-1]==')':\n",
" tokens = tokens[:-1]\n",
" hash_map = {}\n",
" hashes = []\n",
" for t in tokens:\n",
" h = sha256(t)\n",
" hash_map[t] = h\n",
" hashes.append(h)\n",
" print(f'hash_map: {hash_map}')\n",
" print(f'hashes: {hashes}')\n",
" \n",
" hash_of_map = sha256(str(hash_map))\n",
" print(f'hash_of_map: {hash_of_map}')\n",
" # return hash_of_map\n",
"\n",
" revd = tokens[::-1]\n",
" hash_chain = []\n",
" last_hash = sha256(revd.pop(0))\n",
" while len(revd):\n",
" next_hash = sha256(revd.pop(0))\n",
" combine = sha256(last_hash +' '+ next_hash)\n",
"\n",
" hash_chain.append((last_hash, next_hash, combine))\n",
" last_hash = next_hash\n",
" print(f'hash_chain: {hash_chain}')\n",
" return hash_chain[-1][-1]\n",
"\n",
"print(hash_of_list(tokenize('(* 2 3)')))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "40d3a090f54c6569ab1632332b64b2c03c39dcf918b08424e98f38b5ae0af88f"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment