ronentk/toy_hashing_demo.py

## toy_hashing_demo.py
# -*- coding: utf-8 -*-
"""
Toy Bitcoin Mining demo
"""
# Not using SHA-256 as the Skein cryptographic hash function hashes input to arbitrary
# size output, and for demonstration purposes 8 bits are enough.
from skein import skein512
import codecs
import struct
import random
import numpy as np
import matplotlib.pyplot as plt

# Total variation distance measure for distance between distributions
# https://en.wikipedia.org/wiki/Total_variation_distance_of_probability_measures
def total_variation_distance(p,q):
    return 0.5 * (np.linalg.norm(p-q, ord=1))

M = 256 # Number of bins to hash to (8 bit output length)
T = 8 # Target
D = int(M / T)

tvds = []
successes = []
trial_lengths = [1000,5000, 10000, 50000, 100000, 500000, 1000000, 5000000, 10000000]
for trials in trial_lengths:
    results = []
    # This is "toy" mining
    for i in range(trials):
        # Search for a random 32-bit nonce which we append to the rest of the fields
        # to get an 80-byte header (header based on example at
        # https://en.bitcoin.it/wiki/Block_hashing_algorithm)
        nonce_hex = struct.pack(">I", random.getrandbits(32))
        target_hex = struct.pack(">b", T)
        header_hex = ("01000000" +
         "81cd02ab7e569e8bcd9317e2fe99f2de44d49ab2b8851ba4a308000000000000" +
         "e320b6c2fffc8d750423db8b1eb942ae710e951ed797f7affc8892b0f1fc122b" +
         "c7f5d74d" +
         codecs.encode(target_hex, 'hex_codec').decode('utf-8') +
         codecs.encode(nonce_hex, 'hex_codec').decode('utf-8')
         )
        header_bin = codecs.decode(header_hex, 'hex')

        # Hash the header. In Bitcoin, SHA-256 would be applied
        # twice in succession
        h = skein512(header_bin, digest_bits=int(np.log2(M)))
        # Convert output to integer (remember, it will be in range 0-255)
        n = int(h.hexdigest(),16) # conversion from base 16
        results.append(n)


    # Get histogram and plot counts
    counts, bins, patches = plt.hist(results, bins=M)

    # Find distance from empirical distribution to uniform (theoretic)
    # distribution using total variation distance measure
    emp_bin_probs = counts / trials
    ideal_bin_probs = np.ones(M) / M
    tvd = total_variation_distance(emp_bin_probs,ideal_bin_probs)
    tvds.append(tvd)

    # Find number of "golden-nonces"
    ns = np.sum(counts[:T]) # number of trials with hash < T
    successes.append(float(ns)/trials)

# Plot mean
emp_mean_count = np.mean(counts)
plt.plot(np.arange(M),np.ones(M)*emp_mean_count)

plt.xlabel('Bins')
plt.ylabel('Count')

plt.figure()
plt.plot(trial_lengths,tvds)
plt.xlabel('Number of trials')
plt.ylabel('Total Variation Distance')

plt.figure()
plt.plot(trial_lengths, 1.0 / np.array(successes))
plt.plot(trial_lengths,np.ones(len(trial_lengths))*D, color='red'  )
plt.show()
plt.xlabel('Number of trials')
plt.ylabel('Avg. trials per "golden-nonce"')

print ("In theory- success on average every %.5f trials" % (D) )
	# -- coding: utf-8 --
	"""
	Toy Bitcoin Mining demo
	"""
	# Not using SHA-256 as the Skein cryptographic hash function hashes input to arbitrary
	# size output, and for demonstration purposes 8 bits are enough.
	from skein import skein512
	import codecs
	import struct
	import random
	import numpy as np
	import matplotlib.pyplot as plt

	# Total variation distance measure for distance between distributions
	# https://en.wikipedia.org/wiki/Total_variation_distance_of_probability_measures
	def total_variation_distance(p,q):
	return 0.5 * (np.linalg.norm(p-q, ord=1))

	M = 256 # Number of bins to hash to (8 bit output length)
	T = 8 # Target
	D = int(M / T)

	tvds = []
	successes = []
	trial_lengths = [1000,5000, 10000, 50000, 100000, 500000, 1000000, 5000000, 10000000]
	for trials in trial_lengths:
	results = []
	# This is "toy" mining
	for i in range(trials):
	# Search for a random 32-bit nonce which we append to the rest of the fields
	# to get an 80-byte header (header based on example at
	# https://en.bitcoin.it/wiki/Block_hashing_algorithm)
	nonce_hex = struct.pack(">I", random.getrandbits(32))
	target_hex = struct.pack(">b", T)
	header_hex = ("01000000" +
	"81cd02ab7e569e8bcd9317e2fe99f2de44d49ab2b8851ba4a308000000000000" +
	"e320b6c2fffc8d750423db8b1eb942ae710e951ed797f7affc8892b0f1fc122b" +
	"c7f5d74d" +
	codecs.encode(target_hex, 'hex_codec').decode('utf-8') +
	codecs.encode(nonce_hex, 'hex_codec').decode('utf-8')
	)
	header_bin = codecs.decode(header_hex, 'hex')

	# Hash the header. In Bitcoin, SHA-256 would be applied
	# twice in succession
	h = skein512(header_bin, digest_bits=int(np.log2(M)))
	# Convert output to integer (remember, it will be in range 0-255)
	n = int(h.hexdigest(),16) # conversion from base 16
	results.append(n)


	# Get histogram and plot counts
	counts, bins, patches = plt.hist(results, bins=M)

	# Find distance from empirical distribution to uniform (theoretic)
	# distribution using total variation distance measure
	emp_bin_probs = counts / trials
	ideal_bin_probs = np.ones(M) / M
	tvd = total_variation_distance(emp_bin_probs,ideal_bin_probs)
	tvds.append(tvd)

	# Find number of "golden-nonces"
	ns = np.sum(counts[:T]) # number of trials with hash < T
	successes.append(float(ns)/trials)

	# Plot mean
	emp_mean_count = np.mean(counts)
	plt.plot(np.arange(M),np.ones(M)*emp_mean_count)

	plt.xlabel('Bins')
	plt.ylabel('Count')

	plt.figure()
	plt.plot(trial_lengths,tvds)
	plt.xlabel('Number of trials')
	plt.ylabel('Total Variation Distance')

	plt.figure()
	plt.plot(trial_lengths, 1.0 / np.array(successes))
	plt.plot(trial_lengths,np.ones(len(trial_lengths))*D, color='red' )
	plt.show()
	plt.xlabel('Number of trials')
	plt.ylabel('Avg. trials per "golden-nonce"')

	print ("In theory- success on average every %.5f trials" % (D) )