Skip to content

Instantly share code, notes, and snippets.

@wootfish
Last active June 15, 2020 22:36
Show Gist options
  • Save wootfish/eb6415002a399a961a8f49b5dd0c871e to your computer and use it in GitHub Desktop.
Save wootfish/eb6415002a399a961a8f49b5dd0c871e to your computer and use it in GitHub Desktop.
Measuring the resiliences of DHT networks
from functools import wraps
def _read_memo(fname="memo"):
with open(fname, "r") as f:
memo = eval(f.read())
return memo
def _write_memo(memo, fname="memo"):
with open(fname, "w") as f:
f.write(repr(memo))
def cached_memoize(func):
memo = _read_memo()
@wraps(func)
def wrapper(*args):
if args not in memo:
res = func(*args)
memo[args] = res
_write_memo(memo)
return memo[args]
return wrapper
from multiprocessing import Process, Event
import os
import random
import datetime
import itertools
from prefixtree import PrefixTreeNode as PrefixTree
n = 15000
L = 32
k = 16
def make_addr_set(size):
addrs = set(random.randrange(2**L) for _ in range(size))
while len(addrs) < size:
# in case the initial comprehension hit any collisions
addrs |= set(random.randrange(2**L) for _ in range(size - len(addrs)))
return addrs
def run_test(n, m, samples=20000):
defenders = make_addr_set(n)
attackers = make_addr_set(m)
# note that there is the possibility of overlap between defenders and
# attackers - this is ok, the model accounts for it
tree = PrefixTree(L, 50)
for addr in defenders | attackers:
tree.insert(addr)
resilient = 0
compromised = 0
for _ in range(samples):
addr = random.randrange(2**L)
lookup_set = tree.query(addr, k)
if any(addr in defenders for addr in lookup_set):
resilient += 1
else:
compromised += 1
return resilient, compromised
def run_tests(m_vals):
results = {}
print()
for i, m in enumerate(m_vals):
print(f"({i+1}/{len(m_vals)}) m = {m}")
key = (n, m, L, k)
results[key] = run_test(n, m)
print("\nresults:", results)
return results
def worker(stop_event):
while not stop_event.is_set():
m_vals = range(0, 500000+1, 25000)
results = run_tests(m_vals)
fname = "data/" + datetime.datetime.now().isoformat()
with open(fname, "w") as f:
f.write(repr(results))
if __name__ == "__main__":
cpus = os.cpu_count() - 1 # leave one core free for other work
event = Event()
workers = [Process(target=worker, args=(event,)) for _ in range(cpus)]
for p in workers:
p.start()
print("Workers started.")
try:
while True:
pass
except KeyboardInterrupt:
print("Wrapping up...")
event.set()
for p in workers:
p.join()
print("Worker joined.")
print("Done.")
from thm1 import thm_1 as _thm_1
from cached_memoize import cached_memoize
import matplotlib.pyplot as plt
import numpy as np
import os
thm_1 = cached_memoize(_thm_1)
n = 15000
L = 32
k = 16
def load_data():
d = {}
for dirent in os.scandir("data/"):
with open("data/" + dirent.name) as f:
print("reading", dirent.name)
results = eval(f.read())
for key, value in results.items():
r_old, c_old = d.setdefault(key, (0, 0))
r_new, c_new = value
d[key] = (r_old+r_new, c_old+c_new)
return d
def main(slices=100):
# plots E[R_{L,k}] with fifteen thousand honest peers as the number of
# malicious peers ranges from zero to five hundred thousand
d = load_data()
print("Data loaded.")
t = np.arange(0, 500000, 500000/slices)
t = np.append(t, [500000])
model_curve = [thm_1(L, n, m, k, False) for m in t]
print("Model loaded.")
print(d)
m_vals = sorted(_m for _n, _m, _L, _k in d if (_L, _n, _k) == (L, n, k))
measurements = [d[n, m, L, k] for m in m_vals]
R_vals = [r / (r + c) for r, c in measurements]
print(m_vals)
print(R_vals)
plt.plot(t, model_curve, "g-", label="predicted")
plt.plot(m_vals, R_vals, "bo", label="observed", markerfacecolor='none')
plt.legend(loc="lower right")
plt.axis([0, 500000, 0, 1])
plt.show()
if __name__ == "__main__":
main()
from thm1 import thm_1 as _thm_1
from cached_memoize import cached_memoize
import matplotlib.pyplot as plt
import numpy as np
thm_1 = cached_memoize(_thm_1)
L = 32
def main(net_size=5000, slices=100):
# plots E[R_{L,k}] as a function of the ratio of honest peers to total peers
def get_curve(xs, k):
results = []
for x in xs:
n = round(x*net_size)
m = net_size - n
results.append(thm_1(L, n, m, k, False))
return results
t = np.arange(0, 1, 1/slices)
t = np.append(t, [1])
print("Running the numbers...")
for k, c in zip((2, 4, 8, 16, 32), 'rycbg'):
plt.plot(t, get_curve(t, k), f"{c}-", label=f"k={k}")
print("Plotting...")
plt.legend(loc="lower right")
plt.axis([0, 1, 0, 1.00])
plt.show()
if __name__ == "__main__":
main()
import itertools
class PrefixTreeNode:
# prefix tree implementation, with size-capped buckets of values at leaves
def __init__(self, radix: int, bucket_size: int):
self.bucket_size = bucket_size
self.radix = radix
self.bit = 2**radix
self.contents = []
self.left_child = None # 0 branch
self.right_child = None # 1 branch
def insert(self, addr: int):
if self.contents is None:
if (addr & self.bit) == 0:
self.left_child.insert(addr)
else:
self.right_child.insert(addr)
else:
self.contents.append(addr)
if len(self.contents) > self.bucket_size:
if self.radix == 0:
raise Exception("tried to split leaf bucket. duplicate insert?")
contents = self.contents
self.contents = None
self.left_child = PrefixTreeNode(self.radix - 1, self.bucket_size)
self.right_child = PrefixTreeNode(self.radix - 1, self.bucket_size)
for addr in contents:
self.insert(addr)
def query(self, addr: int, k: int):
return tuple(itertools.islice(self._query(addr), k))
def _query(self, addr):
if self.contents is None:
if (addr & self.bit) == 0:
yield from self.left_child._query(addr)
yield from self.right_child._query(addr)
else:
yield from self.right_child._query(addr)
yield from self.left_child._query(addr)
else:
yield from sorted(self.contents, key=lambda item: item ^ addr)
def make_tree(size):
tree = PrefixTreeNode(L, bucket_size=50)
for _ in range(size):
tree.insert(random.randrange(0, POW_2))
return tree
from scipy.stats import hypergeom, binom
import numpy as np
def thm_1(L, n, m, k, quiet=True, use_binom=False):
"""
Computes E[R_{l,k}] using the method derived in Theorem 1.
For debug output, specify `quiet=False`.
To speed things up at the cost of a slight reduction in accuracy, specify
`use_binom=True`. This parameter causes all hypergeometric distributions to
be approximated by binomial distributions.
"""
# check for the trivial case
if n == 0:
return 0
# let's start by defining functions for some of the proof's identities
def P_N(h): # eqn 1.7
if use_binom:
return (1-binom(n, 2**(h-L)).pmf(0)) / (1-binom(n, 2**(h-L+1)).pmf(0))
else:
return (1-hypergeom(2**L, n, 2**h).pmf(0)) / (1-hypergeom(2**L, n, 2**(h+1)).pmf(0))
def P_E(h): # eqn 1.8
return 1-P_N(h)
def P_A(h, a): # eqn 1.9
if use_binom:
return binom(m, 2**(h-L)).pmf(a)
else:
return hypergeom(2**L, m, 2**h).pmf(a)
# here's the main algorithm
if not quiet:
print(f"thm_1({L}, {n}, {m}, {k})")
if use_binom:
print("Using binomial optimization; results will not be 100% accurate.")
# if not quiet:
# print()
# print("NEST probabilities for h in range(L):")
# print(repr(np.array([P_N(h) for h in range(L)])))
# print()
# R_hk records the calculated expectations E[R_{h,k'} | N_{h+1}] for 0 <= h < L and 0 <= k' <= k
R_hk = np.zeros([L, k+1])
# We'll start by using Equation 1.4 to populate R_hk[0, :].
# Then once the h=0 case is finished we'll progress to h=1 and so on.
# R_hk[0, 0] starts at its correct value: 0, since k=0
if use_binom:
R_hk[0, 1] = 1 - P_A(0, 1) * binom(n-1, 1/2**L).pmf(0) / 2
else:
R_hk[0, 1] = 1 - P_A(0, 1) * hypergeom(2**L, n-1, 1).pmf(0) / 2
R_hk[0, 2:] = 1
# now it's time to get to iteratively work through the h > 1 cases
for h in range(1, L):
for k_prime in range(1, k+1):
# applying Equation 1.2
nonempty_expectation = R_hk[h-1, k_prime]
# applying Equation 1.6
empty_expectation = sum(
P_A(h, a) * R_hk[h-1, k_prime-a]
for a in range(k_prime)
)
# applying Equation 1.5
R_hk[h, k_prime] = (P_N(h) * nonempty_expectation
+ P_E(h) * empty_expectation)
# if not quiet:
# print()
# print("Table of R_hk's values:")
# print(repr(R_hk))
# print()
# applying Equation 1.3 to get the final result
return R_hk[L-1, k]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment