Skip to content

Instantly share code, notes, and snippets.

@yongkangc
Last active August 22, 2022 10:04
Show Gist options
  • Save yongkangc/22c2489d99d7cc2836be77380422abbe to your computer and use it in GitHub Desktop.
Save yongkangc/22c2489d99d7cc2836be77380422abbe to your computer and use it in GitHub Desktop.
Progression of ZK
import hashlib
from math import log2, ceil
def hash_string(s):
return hashlib.sha256(s.encode()).hexdigest()
class MerkleTree:
"""
A naive Merkle tree implementation using SHA256
"""
def __init__(self, data):
self.data = data
next_pow_of_2 = int(2**ceil(log2(len(data))))
self.data.extend([0] * (next_pow_of_2 - len(data)))
self.tree = ["" for x in self.data] + \
[hash_string(str(x)) for x in self.data]
for i in range(len(self.data) - 1, 0, -1):
self.tree[i] = hash_string(self.tree[i * 2] + self.tree[i * 2 + 1])
def get_root(self):
return self.tree[1]
def get_val_and_path(self, id):
val = self.data[id]
auth_path = []
id = id + len(self.data)
while id > 1:
auth_path += [self.tree[id ^ 1]]
id = id // 2
return val, auth_path
def verify_merkle_path(root, data_size, value_id, value, path):
cur = hash_string(str(value))
tree_node_id = value_id + int(2**ceil(log2(data_size)))
for sibling in path:
assert tree_node_id > 1
if tree_node_id % 2 == 0:
cur = hash_string(cur + sibling)
else:
cur = hash_string(sibling + cur)
tree_node_id = tree_node_id // 2
assert tree_node_id == 1
return root == cur
import random
def get_witness(problem, assignment):
"""
Given an instance of a partition problem via a list of numbers (the problem) and a list of
(-1, 1), we say that the assignment satisfies the problem if their dot product is 0.
The protocol we developed so far was this:
The verifier chooses a random value i.
According to the value of i, the verifier asks for two values from the witness (usually two consecutive values,
but sometimes the first and the last).
"""
sum = 0
mx = 0
side_obfuscator = 1 - 2 * random.randint(0, 1)
witness = [sum]
assert len(problem) == len(assignment)
for num, side in zip(problem, assignment):
assert side == 1 or side == -1
sum += side * num * side_obfuscator
witness += [sum]
mx = max(mx, num)
# make sure that it is a satisfying assignment
assert sum == 0
shift = random.randint(0, mx)
witness = [x + shift for x in witness]
return witness
class ZkMerkleTree:
"""
A Zero Knowledge Merkle tree implementation using SHA256
"""
def __init__(self, data):
self.data = data
next_pow_of_2 = int(2**ceil(log2(len(data))))
self.data.extend([0] * (next_pow_of_2 - len(data)))
# Intertwine with randomness to obtain zero knowledge.
rand_list = [random.randint(0, 1 << 32) for x in self.data]
self.data = [x for tup in zip(self.data, rand_list) for x in tup]
# Create bottom level of the tree (i.e. leaves).
self.tree = ["" for x in self.data] + \
[hash_string(str(x)) for x in self.data]
for i in range(len(self.data) - 1, 0, -1):
self.tree[i] = hash_string(self.tree[i * 2] + self.tree[i * 2 + 1])
def get_root(self):
return self.tree[1]
def get_val_and_path(self, id):
# Because of the zk padding, the data is now at id * 2
id = id * 2
val = self.data[id]
auth_path = []
id = id + len(self.data)
while id > 1:
auth_path += [self.tree[id ^ 1]]
id = id // 2
return val, auth_path
def verify_zk_merkle_path(root, data_size, value_id, value, path):
cur = hash_string(str(value))
# Due to zk padding, data_size needs to be multiplied by 2, as does the value_id
tree_node_id = value_id * 2 + int(2**ceil(log2(data_size * 2)))
for sibling in path:
assert tree_node_id > 1
if tree_node_id % 2 == 0:
cur = hash_string(cur + sibling)
else:
cur = hash_string(sibling + cur)
tree_node_id = tree_node_id // 2
assert tree_node_id == 1
return root == cur
# This has the desired effect because whenever the prover has to reveal an authentication path for a piece of data - all the hashes revealed are affected by random data, and having been mixed through the Sha256 hash - these hashes appear random, and provide zero knowledge (other than the revelaed leaf node).
@yongkangc
Copy link
Author

As it turns out, this is very easy to fix by adding another level to the tree, such that in it, every pair of siblings is comprised of one node associated with actual data, and the other with a random string.
This is again the notion of mixing real data with randomness in order to obtain zero knowledge.
image

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment