Skip to content

Instantly share code, notes, and snippets.

@DavidBuchanan314
Last active December 27, 2023 18:09
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save DavidBuchanan314/e2d84c50cbd8e7c86eaa25f0c5b29a5c to your computer and use it in GitHub Desktop.
Save DavidBuchanan314/e2d84c50cbd8e7c86eaa25f0c5b29a5c to your computer and use it in GitHub Desktop.
I wrote about this code in more detail here: https://www.da.vidbuchanan.co.uk/blog/signing-json.html
"""
DISCLAIMER: This is a quick prototype, it's not at all tested, and may be deeply cryptographically flawed.
Normally, JSON canonicalization is at least O(nlogn), because you need to sort the map keys.
This approach avoids the need to do that, and in theory it's O(n), but in practice it's probably slower for most inputs... I have not benchmarked.
If you limit recursion depth, you could implement it as an Online Algorithm https://en.wikipedia.org/wiki/Online_algorithm
NB: Python's JSON parser allows duplicate map keys, which this impl will be oblivious to.
(Edit: I'm now using object_pairs_hook to forbid duplicate keys)
hash_number() is probably a bit half-baked too.
"""
import json
import struct
import hashlib
from typing import Dict, Any
import nacl.bindings.crypto_core # install via: python3 -m pip install git+https://github.com/pyca/pynacl
hash256 = hashlib.sha256
def hash_str(value: str) -> bytes:
return hash256(b"s" + value.encode("utf-8")).digest()
def hash_number(value: int | float) -> bytes:
return hash256(b"n" + struct.pack("<d", value)).digest()
def hash_value(value: Any) -> bytes:
t = type(value)
if t is str:
return hash_str(value)
if t in (int, float):
return hash_number(value)
if t is list:
return hash_array(value)
if t is dict:
return hash_map(value)
if t is bool:
return hash256(b"t" if value else b"f").digest()
if value is None:
return hash256(b"u").digest() # u for undefined
raise Exception(f"dunno how to hash a {t}")
def hash_array(value: list) -> bytes:
h = hash256()
h.update(b"a")
for entry in value:
h.update(hash_value(entry))
return h.digest()
def hash_map(value: Dict[str, Any]) -> bytes:
accumulator = b'\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00'
for k, v in value.items():
point = nacl.bindings.crypto_core.crypto_core_ed25519_from_uniform(hash256(hash_str(k) + hash_value(v)).digest())
accumulator = nacl.bindings.crypto_core.crypto_core_ed25519_add(accumulator, point)
return hash256(b"m" + accumulator).digest()
def ensure_no_duplicate_keys(object_pairs: Dict[str, Any]) -> dict:
value = dict(object_pairs)
if len(value) != len(object_pairs):
raise ValueError("Duplicate JSON map keys")
return value
def hash_json(data: str) -> bytes:
return hash_value(json.loads(data, object_pairs_hook=ensure_no_duplicate_keys))
if __name__ == "__main__":
print(hash_json('{"hello": "world"}').hex())
assert(
hash_json('{"a": "a", "b": "b"}') == \
hash_json('{ "b": "b", "a": "a"}')
)
assert(
hash_json('{"b": "b", "a": "a", "c": "c"}') == \
hash_json('{"c": "c", "b": "b", "a": "a"}') == \
hash_json('{"a" : "a", "b": "b", "c": "c"}')
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment