Skip to content

Instantly share code, notes, and snippets.

@cculianu
Last active December 16, 2020 21:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cculianu/8e97702e7a4e21d425415b3098799e2a to your computer and use it in GitHub Desktop.
Save cculianu/8e97702e7a4e21d425415b3098799e2a to your computer and use it in GitHub Desktop.
Benchmark recursive hashing vs current E-X approach
#!/usr/bin/env python3
import hashlib
import random
import time
from typing import List
def make_random_history(count: int) -> List[bytes]:
ret = []
for _ in range(count):
rand_hash_hex = bytes(random.getrandbits(8) for _ in range(32)).hex()
rand_height = random.randint(0, 600_000)
ret.append(f"{rand_hash_hex}:{rand_height}".encode('ascii'))
ret = sorted(ret, key=lambda x: int(x.split(b':')[-1]))
return ret
def sha256d(b: bytes) -> bytes:
return hashlib.sha256(hashlib.sha256(b).digest()).digest()
def recursive_hash(items: List[bytes], chunk_size: int = 1) -> bytes:
status = b'00' * 32
for i in range(0, len(items), chunk_size):
status = sha256d(status.hex().encode('ascii') + b':' + b':'.join(items[i:i+chunk_size]))
return status
def main():
n = 1_000_000
print(f"Generating {n} random sorted history items ...")
t0 = time.time()
items = make_random_history(n)
print(f"Elapsed: {(time.time() - t0) * 1e3:1.3f} msec")
print('-' * 80)
# Bench hashing 1 million items the current E-X way
print(f"Hashing all {n} items once (current E-X approach) ...")
t0 = time.time()
bstr = b':'.join(items);
t1 = time.time()
fake_status = sha256d(bstr).hex()
print(f"Result: {fake_status}")
print(f"Elapsed: {(time.time()-t0)*1e3:1.3f} msec (of which {(t1-t0)*1e3:1.3f} msec was spent concatenating the "
f"byte string)")
print('-' * 80)
# Bench hashing 1 million items the proposed 'recursive' way, using various "chunk" sizes
for chunk_size in (1, 10, 100, 1_000, 10_000):
print(f"Hashing all {n} items recursively, using \"chunk size\" of {chunk_size} ...")
t0 = time.time()
fake_status = recursive_hash(items, chunk_size).hex()
print(f"Result: {fake_status}")
print(f"Elapsed: {(time.time()-t0)*1e3:1.3f} msec")
print('-' * 80)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment