Skip to content

Instantly share code, notes, and snippets.

@qstokkink
Created April 6, 2022 08:09
Show Gist options
  • Save qstokkink/fecfe070d801538834ff90aa45ddb130 to your computer and use it in GitHub Desktop.
Save qstokkink/fecfe070d801538834ff90aa45ddb130 to your computer and use it in GitHub Desktop.
"Timely Sharing with Reputation Prototype" reference implementation
"""
#!/usr/bin/python3
Check if nodes "must store" data using reputation.
Example use of these functions is given at line 169.
"""
from math import log
from typing import Dict
import hashlib
import os
import random
OSTRACIZATION: Dict[bytes, float] = {}
REPUTATIONS: Dict[bytes, float] = {}
"""
In this example file we use a global dictionary for reputations.
If a node is not run in separate process, this needs to be replaced with local reputation estimates per node.
"""
def get_reputation(nodeid: bytes) -> float:
"""
Get the reputation for a node id. Not used in experiments, but included for demo functionality.
"""
return REPUTATIONS.get(nodeid, 0.0) * OSTRACIZATION.get(nodeid, 1.0)
def set_reputation(nodeid: bytes, reputation: float) -> None:
"""
Set the reputation for a node id. Not used in experiments, but included for demo functionality.
"""
REPUTATIONS[nodeid] = reputation
def ostracize(nodeid: bytes):
"""
Temporarily set the reputation for a node id to 0. Not used in experiments, but included for demo functionality.
"""
OSTRACIZATION[nodeid] = 0.0
def unostracize(nodeid: bytes):
"""
Undo ostracization for a node id. Not used in experiments, but included for demo functionality.
"""
OSTRACIZATION.pop(nodeid)
def get_Q(n: int) -> int:
"""
Calculate a nice value for the hash function output length Q from the network size.
:param n: the estimated network size
"""
return max(0, int(log(n, 2) - 1))
def prng(data: bytes, nodeid: bytes, Q: int) -> bytes:
"""
Seed a PRNG with a node id and data to produce a pseudorandom value of Q bits.
:param data: the data to seed with
:param nodeid: the node id to seed with
:param Q: the hash function output length
"""
random.seed(nodeid + data)
ia = [random.randint(0, 255) for _ in range(int(Q/8) + 1)]
if (Q % 8) != 0:
masksize = Q % 8
mask = 0
for _ in range(masksize):
mask = (mask << 1) | 1
ia[0] = ia[0] & mask
else:
ia = ia[1:] # One byte too many
return bytes(ia)
def get_H(data: bytes, Q: int) -> bytes:
"""
Calculate a Q bits hash value of data.
:param data: the data to hash
:param Q: the number of bits the hash value should be
"""
rH = hashlib.sha256(data).digest()
ia = [rH[x % len(rH)] for x in range(int(Q/8) + 1)]
if (Q % 8) != 0:
masksize = Q % 8
mask = 0
for _ in range(masksize):
mask = (mask << 1) | 1
ia[0] = ia[0] & mask
else:
ia = ia[1:] # One byte too many
return bytes(ia)
def ihamming_distance(i1: int, i2: int) -> int:
"""
Calculate the Hamming distance between two integers.
:param i1: the first integer
:param i2: the second integer
"""
diffbits = i1 ^ i2
count = 0
for _ in range(diffbits.bit_length()):
if (diffbits & 1) == 1:
count += 1
diffbits = diffbits >> 1
return count
def hamming_distance(chaine1: bytes, chaine2: bytes) -> int:
"""
Calculate the Hamming distance between two bytes.
:param chaine1: the first chain of bytes
:param chaine2: the second chain of btyes
"""
return sum(ihamming_distance(c1, c2) for c1, c2 in zip(chaine1, chaine2))
def binding_score(data: bytes, nodeid: bytes, Q: int) -> int:
"""
Calculate the binding score between data and a node id, using a given value for Q.
:param data: the data to calculate for
:param nodeid: the node id to calculate for
:param Q: the internal value for the number of bits the hash value should be
"""
H = get_H(nodeid, Q)
r = prng(data, nodeid, Q)
return hamming_distance(H, r)
def mapping_func(d: float, Q: int, D: int) -> float:
"""
Calculate the mapping Z on [0.0, 1.0] from a given distance (on [0.0, Q]), Q, and D.
:param d: the Hamming distance over Q
:param Q: the internal value for the number of bits the hash value should be
:param D: the minimum replication factor
"""
return d*float(Q - D)
def should_store(data: bytes, nodeid: bytes,
f_reputation=get_reputation, n: int = 0, D: int = 0, Q: int = None) -> bool:
"""
Given data and a node id, determine if the node should store the data.
:param data: the data to calculate for
:param nodeid: the node id to calculate for
:param n: the estimated network size (used if Q is None)
:param D: the minimum replication factor
:param Q: a fixed value for Q (give the estimated network size n if None)
:param f_reputation: function that returns the reputation (float) of a node id (int)
:return:
"""
Q = Q if Q is not None else get_Q(n)
return binding_score(data, nodeid, Q) >= mapping_func(f_reputation(nodeid), Q, D)
if __name__ == "__main__":
# Example use (run this file for a demo).
# We use D=0 and set all reputations to 1 to mimic worst-case replication.
# The average copies should *roughly* be 2 (this is probabilistic and may be slightly higher or lower).
repetitions = 100
network_size = 128
replication_sum = 0
for _ in range(repetitions):
copies = 0
data = os.urandom(20) # Mimics application data
nodeids = [os.urandom(64) for _ in range(network_size)] # Mimics public keys of 64 bytes
Q = get_Q(network_size)
# We naively assume full network connectivity for this demo. This is normally hooked into a network overlay.
for i in range(network_size):
set_reputation(nodeids[i], 1.0)
if should_store(data, nodeids[i], n=network_size, D=0, Q=Q):
copies += 1
replication_sum += copies
print(f"Average replicas {replication_sum / float(repetitions)} stored on {network_size} available nodes!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment