wootfish/cached_memoize.py

## cached_memoize.py
from functools import wraps


def _read_memo(fname="memo"):
    with open(fname, "r") as f:
        memo = eval(f.read())
    return memo


def _write_memo(memo, fname="memo"):
    with open(fname, "w") as f:
        f.write(repr(memo))


def cached_memoize(func):
    memo = _read_memo()

    @wraps(func)
    def wrapper(*args):
        if args not in memo:
            res = func(*args)
            memo[args] = res
            _write_memo(memo)
        return memo[args]

    return wrapper

## get_data.py
from multiprocessing import Process, Event
import os
import random
import datetime
import itertools

from prefixtree import PrefixTreeNode as PrefixTree


n = 15000
L = 32
k = 16


def make_addr_set(size):
    addrs = set(random.randrange(2**L) for _ in range(size))
    while len(addrs) < size:
        # in case the initial comprehension hit any collisions
        addrs |= set(random.randrange(2**L) for _ in range(size - len(addrs)))
    return addrs


def run_test(n, m, samples=20000):
    defenders = make_addr_set(n)
    attackers = make_addr_set(m)

    # note that there is the possibility of overlap between defenders and
    # attackers - this is ok, the model accounts for it

    tree = PrefixTree(L, 50)
    for addr in defenders | attackers:
        tree.insert(addr)

    resilient = 0
    compromised = 0

    for _ in range(samples):
        addr = random.randrange(2**L)
        lookup_set = tree.query(addr, k)
        if any(addr in defenders for addr in lookup_set):
            resilient += 1
        else:
            compromised += 1

    return resilient, compromised


def run_tests(m_vals):
    results = {}

    print()
    for i, m in enumerate(m_vals):
        print(f"({i+1}/{len(m_vals)}) m = {m}")
        key = (n, m, L, k)
        results[key] = run_test(n, m)

    print("\nresults:", results)
    return results


def worker(stop_event):
    while not stop_event.is_set():
        m_vals = range(0, 500000+1, 25000)
        results = run_tests(m_vals)

        fname = "data/" + datetime.datetime.now().isoformat()
        with open(fname, "w") as f:
            f.write(repr(results))


if __name__ == "__main__":
    cpus = os.cpu_count() - 1  # leave one core free for other work
    event = Event()
    workers = [Process(target=worker, args=(event,)) for _ in range(cpus)]
    for p in workers:
        p.start()
    print("Workers started.")
    try:
        while True:
            pass
    except KeyboardInterrupt:
        print("Wrapping up...")
        event.set()
    for p in workers:
        p.join()
        print("Worker joined.")
    print("Done.")

## plot_curve.py
from thm1 import thm_1 as _thm_1
from cached_memoize import cached_memoize

import matplotlib.pyplot as plt

import numpy as np
import os


thm_1 = cached_memoize(_thm_1)


n = 15000
L = 32
k = 16


def load_data():
    d = {}

    for dirent in os.scandir("data/"):
        with open("data/" + dirent.name) as f:
            print("reading", dirent.name)
            results = eval(f.read())

            for key, value in results.items():
                r_old, c_old = d.setdefault(key, (0, 0))
                r_new, c_new = value
                d[key] = (r_old+r_new, c_old+c_new)

    return d


def main(slices=100):
    # plots E[R_{L,k}] with fifteen thousand honest peers as the number of
    # malicious peers ranges from zero to five hundred thousand

    d = load_data()
    print("Data loaded.")

    t = np.arange(0, 500000, 500000/slices)
    t = np.append(t, [500000])

    model_curve = [thm_1(L, n, m, k, False) for m in t]
    print("Model loaded.")

    print(d)

    m_vals = sorted(_m for _n, _m, _L, _k in d if (_L, _n, _k) == (L, n, k))
    measurements = [d[n, m, L, k] for m in m_vals]
    R_vals = [r / (r + c) for r, c in measurements]

    print(m_vals)
    print(R_vals)

    plt.plot(t, model_curve, "g-", label="predicted")
    plt.plot(m_vals, R_vals, "bo", label="observed", markerfacecolor='none')

    plt.legend(loc="lower right")
    plt.axis([0, 500000, 0, 1])

    plt.show()


if __name__ == "__main__":
    main()

## plot_ratio.py
from thm1 import thm_1 as _thm_1
from cached_memoize import cached_memoize

import matplotlib.pyplot as plt

import numpy as np


thm_1 = cached_memoize(_thm_1)


L = 32


def main(net_size=5000, slices=100):
    # plots E[R_{L,k}] as a function of the ratio of honest peers to total peers

    def get_curve(xs, k):
        results = []
        for x in xs:
            n = round(x*net_size)
            m = net_size - n
            results.append(thm_1(L, n, m, k, False))
        return results

    t = np.arange(0, 1, 1/slices)
    t = np.append(t, [1])

    print("Running the numbers...")

    for k, c in zip((2, 4, 8, 16, 32), 'rycbg'):
        plt.plot(t, get_curve(t, k), f"{c}-", label=f"k={k}")

    print("Plotting...")

    plt.legend(loc="lower right")

    plt.axis([0, 1, 0, 1.00])
    plt.show()


if __name__ == "__main__":
    main()

## prefixtree.py
import itertools


class PrefixTreeNode:
    # prefix tree implementation, with size-capped buckets of values at leaves
    def __init__(self, radix: int, bucket_size: int):
        self.bucket_size = bucket_size
        self.radix = radix
        self.bit = 2**radix
        self.contents = []
        self.left_child = None   # 0 branch
        self.right_child = None  # 1 branch

    def insert(self, addr: int):
        if self.contents is None:
            if (addr & self.bit) == 0:
                self.left_child.insert(addr)
            else:
                self.right_child.insert(addr)
        else:
            self.contents.append(addr)

            if len(self.contents) > self.bucket_size:
                if self.radix == 0:
                    raise Exception("tried to split leaf bucket. duplicate insert?")

                contents = self.contents
                self.contents = None
                self.left_child = PrefixTreeNode(self.radix - 1, self.bucket_size)
                self.right_child = PrefixTreeNode(self.radix - 1, self.bucket_size)

                for addr in contents:
                    self.insert(addr)

    def query(self, addr: int, k: int):
        return tuple(itertools.islice(self._query(addr), k))

    def _query(self, addr):
        if self.contents is None:
            if (addr & self.bit) == 0:
                yield from self.left_child._query(addr)
                yield from self.right_child._query(addr)
            else:
                yield from self.right_child._query(addr)
                yield from self.left_child._query(addr)
        else:
            yield from sorted(self.contents, key=lambda item: item ^ addr)


def make_tree(size):
    tree = PrefixTreeNode(L, bucket_size=50)
    for _ in range(size):
        tree.insert(random.randrange(0, POW_2))
    return tree

## thm1.py
from scipy.stats import hypergeom, binom

import numpy as np


def thm_1(L, n, m, k, quiet=True, use_binom=False):
    """
    Computes E[R_{l,k}] using the method derived in Theorem 1.

    For debug output, specify `quiet=False`.

    To speed things up at the cost of a slight reduction in accuracy, specify
    `use_binom=True`. This parameter causes all hypergeometric distributions to
    be approximated by binomial distributions.
    """

    # check for the trivial case
    if n == 0:
        return 0

    # let's start by defining functions for some of the proof's identities

    def P_N(h):  # eqn 1.7
        if use_binom:
            return (1-binom(n, 2**(h-L)).pmf(0)) / (1-binom(n, 2**(h-L+1)).pmf(0))
        else:
            return (1-hypergeom(2**L, n, 2**h).pmf(0)) / (1-hypergeom(2**L, n, 2**(h+1)).pmf(0))

    def P_E(h):  #  eqn 1.8
        return 1-P_N(h)

    def P_A(h, a):  # eqn 1.9
        if use_binom:
            return binom(m, 2**(h-L)).pmf(a)
        else:
            return hypergeom(2**L, m, 2**h).pmf(a)

    # here's the main algorithm

    if not quiet:
        print(f"thm_1({L}, {n}, {m}, {k})")
        if use_binom:
            print("Using binomial optimization; results will not be 100% accurate.")

    # if not quiet:
    #     print()
    #     print("NEST probabilities for h in range(L):")
    #     print(repr(np.array([P_N(h) for h in range(L)])))
    #     print()

    # R_hk records the calculated expectations  E[R_{h,k'} | N_{h+1}]  for 0 <= h < L and 0 <= k' <= k
    R_hk = np.zeros([L, k+1])

    # We'll start by using Equation 1.4 to populate R_hk[0, :].
    # Then once the h=0 case is finished we'll progress to h=1 and so on.

    # R_hk[0, 0] starts at its correct value: 0, since k=0

    if use_binom:
        R_hk[0, 1] = 1 - P_A(0, 1) * binom(n-1, 1/2**L).pmf(0) / 2
    else:
        R_hk[0, 1] = 1 - P_A(0, 1) * hypergeom(2**L, n-1, 1).pmf(0) / 2

    R_hk[0, 2:] = 1

    # now it's time to get to iteratively work through the h > 1 cases
    for h in range(1, L):
        for k_prime in range(1, k+1):
            # applying Equation 1.2
            nonempty_expectation = R_hk[h-1, k_prime]

            # applying Equation 1.6
            empty_expectation = sum(
                    P_A(h, a) * R_hk[h-1, k_prime-a]
                    for a in range(k_prime)
                    )

            # applying Equation 1.5
            R_hk[h, k_prime] = (P_N(h) * nonempty_expectation
                             +  P_E(h) * empty_expectation)

    # if not quiet:
    #     print()
    #     print("Table of R_hk's values:")
    #     print(repr(R_hk))
    #     print()

    # applying Equation 1.3 to get the final result
    return R_hk[L-1, k]
	from functools import wraps


	def _read_memo(fname="memo"):
	with open(fname, "r") as f:
	memo = eval(f.read())
	return memo


	def _write_memo(memo, fname="memo"):
	with open(fname, "w") as f:
	f.write(repr(memo))


	def cached_memoize(func):
	memo = _read_memo()

	@wraps(func)
	def wrapper(*args):
	if args not in memo:
	res = func(*args)
	memo[args] = res
	_write_memo(memo)
	return memo[args]

	return wrapper
	from multiprocessing import Process, Event
	import os
	import random
	import datetime
	import itertools

	from prefixtree import PrefixTreeNode as PrefixTree


	n = 15000
	L = 32
	k = 16


	def make_addr_set(size):
	addrs = set(random.randrange(2**L) for _ in range(size))
	while len(addrs) < size:
	# in case the initial comprehension hit any collisions
	addrs \|= set(random.randrange(2**L) for _ in range(size - len(addrs)))
	return addrs


	def run_test(n, m, samples=20000):
	defenders = make_addr_set(n)
	attackers = make_addr_set(m)

	# note that there is the possibility of overlap between defenders and
	# attackers - this is ok, the model accounts for it

	tree = PrefixTree(L, 50)
	for addr in defenders \| attackers:
	tree.insert(addr)

	resilient = 0
	compromised = 0

	for _ in range(samples):
	addr = random.randrange(2**L)
	lookup_set = tree.query(addr, k)
	if any(addr in defenders for addr in lookup_set):
	resilient += 1
	else:
	compromised += 1

	return resilient, compromised



	def run_tests(m_vals):
	results = {}

	print()
	for i, m in enumerate(m_vals):
	print(f"({i+1}/{len(m_vals)}) m = {m}")
	key = (n, m, L, k)
	results[key] = run_test(n, m)

	print("\nresults:", results)
	return results


	def worker(stop_event):
	while not stop_event.is_set():
	m_vals = range(0, 500000+1, 25000)
	results = run_tests(m_vals)

	fname = "data/" + datetime.datetime.now().isoformat()
	with open(fname, "w") as f:
	f.write(repr(results))


	if __name__ == "__main__":
	cpus = os.cpu_count() - 1 # leave one core free for other work
	event = Event()
	workers = [Process(target=worker, args=(event,)) for _ in range(cpus)]
	for p in workers:
	p.start()
	print("Workers started.")
	try:
	while True:
	pass
	except KeyboardInterrupt:
	print("Wrapping up...")
	event.set()
	for p in workers:
	p.join()
	print("Worker joined.")
	print("Done.")
	from thm1 import thm_1 as _thm_1
	from cached_memoize import cached_memoize

	import matplotlib.pyplot as plt

	import numpy as np
	import os


	thm_1 = cached_memoize(_thm_1)


	n = 15000
	L = 32
	k = 16


	def load_data():
	d = {}

	for dirent in os.scandir("data/"):
	with open("data/" + dirent.name) as f:
	print("reading", dirent.name)
	results = eval(f.read())

	for key, value in results.items():
	r_old, c_old = d.setdefault(key, (0, 0))
	r_new, c_new = value
	d[key] = (r_old+r_new, c_old+c_new)

	return d


	def main(slices=100):
	# plots E[R_{L,k}] with fifteen thousand honest peers as the number of
	# malicious peers ranges from zero to five hundred thousand

	d = load_data()
	print("Data loaded.")

	t = np.arange(0, 500000, 500000/slices)
	t = np.append(t, [500000])

	model_curve = [thm_1(L, n, m, k, False) for m in t]
	print("Model loaded.")

	print(d)

	m_vals = sorted(_m for _n, _m, _L, _k in d if (_L, _n, _k) == (L, n, k))
	measurements = [d[n, m, L, k] for m in m_vals]
	R_vals = [r / (r + c) for r, c in measurements]

	print(m_vals)
	print(R_vals)

	plt.plot(t, model_curve, "g-", label="predicted")
	plt.plot(m_vals, R_vals, "bo", label="observed", markerfacecolor='none')

	plt.legend(loc="lower right")
	plt.axis([0, 500000, 0, 1])

	plt.show()


	if __name__ == "__main__":
	main()
	import itertools


	class PrefixTreeNode:
	# prefix tree implementation, with size-capped buckets of values at leaves
	def __init__(self, radix: int, bucket_size: int):
	self.bucket_size = bucket_size
	self.radix = radix
	self.bit = 2**radix
	self.contents = []
	self.left_child = None # 0 branch
	self.right_child = None # 1 branch

	def insert(self, addr: int):
	if self.contents is None:
	if (addr & self.bit) == 0:
	self.left_child.insert(addr)
	else:
	self.right_child.insert(addr)
	else:
	self.contents.append(addr)

	if len(self.contents) > self.bucket_size:
	if self.radix == 0:
	raise Exception("tried to split leaf bucket. duplicate insert?")

	contents = self.contents
	self.contents = None
	self.left_child = PrefixTreeNode(self.radix - 1, self.bucket_size)
	self.right_child = PrefixTreeNode(self.radix - 1, self.bucket_size)

	for addr in contents:
	self.insert(addr)

	def query(self, addr: int, k: int):
	return tuple(itertools.islice(self._query(addr), k))

	def _query(self, addr):
	if self.contents is None:
	if (addr & self.bit) == 0:
	yield from self.left_child._query(addr)
	yield from self.right_child._query(addr)
	else:
	yield from self.right_child._query(addr)
	yield from self.left_child._query(addr)
	else:
	yield from sorted(self.contents, key=lambda item: item ^ addr)


	def make_tree(size):
	tree = PrefixTreeNode(L, bucket_size=50)
	for _ in range(size):
	tree.insert(random.randrange(0, POW_2))
	return tree
	from scipy.stats import hypergeom, binom

	import numpy as np



	def thm_1(L, n, m, k, quiet=True, use_binom=False):
	"""
	Computes E[R_{l,k}] using the method derived in Theorem 1.

	For debug output, specify `quiet=False`.

	To speed things up at the cost of a slight reduction in accuracy, specify
	`use_binom=True`. This parameter causes all hypergeometric distributions to
	be approximated by binomial distributions.
	"""

	# check for the trivial case
	if n == 0:
	return 0

	# let's start by defining functions for some of the proof's identities

	def P_N(h): # eqn 1.7
	if use_binom:
	return (1-binom(n, 2(h-L)).pmf(0)) / (1-binom(n, 2(h-L+1)).pmf(0))
	else:
	return (1-hypergeom(2L, n, 2h).pmf(0)) / (1-hypergeom(2L, n, 2(h+1)).pmf(0))

	def P_E(h): # eqn 1.8
	return 1-P_N(h)

	def P_A(h, a): # eqn 1.9
	if use_binom:
	return binom(m, 2**(h-L)).pmf(a)
	else:
	return hypergeom(2L, m, 2h).pmf(a)

	# here's the main algorithm

	if not quiet:
	print(f"thm_1({L}, {n}, {m}, {k})")
	if use_binom:
	print("Using binomial optimization; results will not be 100% accurate.")

	# if not quiet:
	# print()
	# print("NEST probabilities for h in range(L):")
	# print(repr(np.array([P_N(h) for h in range(L)])))
	# print()

	# R_hk records the calculated expectations E[R_{h,k'} \| N_{h+1}] for 0 <= h < L and 0 <= k' <= k
	R_hk = np.zeros([L, k+1])

	# We'll start by using Equation 1.4 to populate R_hk[0, :].
	# Then once the h=0 case is finished we'll progress to h=1 and so on.

	# R_hk[0, 0] starts at its correct value: 0, since k=0

	if use_binom:
	R_hk[0, 1] = 1 - P_A(0, 1) * binom(n-1, 1/2**L).pmf(0) / 2
	else:
	R_hk[0, 1] = 1 - P_A(0, 1) * hypergeom(2**L, n-1, 1).pmf(0) / 2

	R_hk[0, 2:] = 1

	# now it's time to get to iteratively work through the h > 1 cases
	for h in range(1, L):
	for k_prime in range(1, k+1):
	# applying Equation 1.2
	nonempty_expectation = R_hk[h-1, k_prime]

	# applying Equation 1.6
	empty_expectation = sum(
	P_A(h, a) * R_hk[h-1, k_prime-a]
	for a in range(k_prime)
	)

	# applying Equation 1.5
	R_hk[h, k_prime] = (P_N(h) * nonempty_expectation
	+ P_E(h) * empty_expectation)

	# if not quiet:
	# print()
	# print("Table of R_hk's values:")
	# print(repr(R_hk))
	# print()

	# applying Equation 1.3 to get the final result
	return R_hk[L-1, k]