Soares/coin_bias_test.py

## coin_bias_test.py
# Let's say you have a fair coin.
# Let's say you don't know whether or not it's actually biased towards heads by a certain degree.
# Let's say you can flip it at most a certain number of times.
# What's the probability that, at some point along the way, it looks pretty likely to be biased?
from random import random
from scipy.misc import comb

# The actual bias of the coin
BIAS = 0.5
# The number of times to flip the coin
FLIPS = 300
# All hypotheses will be compared only to the final hypothesis
HYPOTHESES = (0.75, 0.55, 0.5)
# We'll check how often the likelihood ratio goes above these values
THRESHOLDS = (20, 100)
# Ignore extreme odds that occur before at least this much data has been collected:
LARGE_NUMBER = 0
# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds.
RUNS = 10000

def gen_sequence():
    for i in range(FLIPS):
        yield 'H' if random() < BIAS else 'T'

def odds_sequence(sequence):
    n_heads = 0
    n_flips = 0
    for flip in sequence:
        n_flips += 1
        if flip == 'H':
            n_heads += 1
        yield tuple(b**n_heads * (1-b)**(n_flips - n_heads) for b in HYPOTHESES)

def relative_odds(odds, i, j):
    return odds[i] / odds[j] if odds[j] > 0 else float('inf')

def most_extreme_odds(i, odds_list):
    assert odds_list
    rel_odds = [relative_odds(o, i, -1) for o in odds_list]
    indexed_odds = list(enumerate(rel_odds))[LARGE_NUMBER:]

    max_i, max_odds = max(indexed_odds, key=lambda io: io[1])
    min_i, min_odds = min(indexed_odds, key=lambda io: io[1])
    return (max_odds, max_i + 1, min_odds, min_i + 1, rel_odds[-1], len(rel_odds))

# Generates a report for a single run of the data.
# (Not used by default, but you can use it yourself to inspect a single run.)
def report(odds_list):
    for i in range(len(HYPOTHESES) - 1):
        max_odds, max_n, min_odds, min_n, final_odds, final_n = most_extreme_odds(i, odds_list)
        print('Odds that the coin was {:2.0f}% biased towards heads (as opposed to {:2.0f}%)'.format(
            100.0 * HYPOTHESES[i],
            100.0 * HYPOTHESES[-1]))
        print('\tMaximum: ({} : 1) after {} flips.'.format(max_odds, max_n))
        print('\tMinimum: ({} : 1) after {} flips.'.format(min_odds, min_n))
        print('\t  Final: ({} : 1) after {} flips.'.format(final_odds, final_n))

def report_threshold_breaches(i, odds_lists):
    assert odds_lists
    extremities = (most_extreme_odds(i, odds_list) for odds_list in odds_lists)
    middle_counters = {t: 0 for t in THRESHOLDS}
    middle_trackers = {t: [] for t in THRESHOLDS}
    final_counters = {t: 0 for t in THRESHOLDS}
    for max_odds, max_n, _, _, final_odds, _ in extremities:
        for threshold in THRESHOLDS:
            if max_odds >= threshold:
                middle_counters[threshold] += 1
                middle_trackers[threshold].append(max_n)
            if final_odds >= threshold:
                final_counters[threshold] += 1
    for threshold in THRESHOLDS:
        print('Odds for {:2.0f}% bias over {:2.0f}% went above ({} : 1) {} times, which was ~{:0.2f}% of the time'.format(
            100.0 * HYPOTHESES[i],
            100.0 * HYPOTHESES[-1],
            threshold,
            middle_counters[threshold],
            (100.0 * middle_counters[threshold]) / len(odds_lists)))
        print('\tand ended above ({} : 1) odds {} ({:0.0f}%) times.'.format(
            threshold,
            final_counters[threshold],
            (100.0 * final_counters[threshold]) / len(odds_lists)))
        print('\tthreshold max happened at around toss number {:0.2f}, on average.'.format(
            sum(middle_trackers[threshold]) / len(middle_trackers[threshold])))

def run():
    odds_lists = [list(odds_sequence(gen_sequence())) for _ in range(RUNS)]
    for i in range(len(HYPOTHESES) - 1):
        report_threshold_breaches(i, odds_lists)
        print('')

run()
	# Let's say you have a fair coin.
	# Let's say you don't know whether or not it's actually biased towards heads by a certain degree.
	# Let's say you can flip it at most a certain number of times.
	# What's the probability that, at some point along the way, it looks pretty likely to be biased?
	from random import random
	from scipy.misc import comb

	# The actual bias of the coin
	BIAS = 0.5
	# The number of times to flip the coin
	FLIPS = 300
	# All hypotheses will be compared only to the final hypothesis
	HYPOTHESES = (0.75, 0.55, 0.5)
	# We'll check how often the likelihood ratio goes above these values
	THRESHOLDS = (20, 100)
	# Ignore extreme odds that occur before at least this much data has been collected:
	LARGE_NUMBER = 0
	# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds.
	RUNS = 10000

	def gen_sequence():
	for i in range(FLIPS):
	yield 'H' if random() < BIAS else 'T'

	def odds_sequence(sequence):
	n_heads = 0
	n_flips = 0
	for flip in sequence:
	n_flips += 1
	if flip == 'H':
	n_heads += 1
	yield tuple(b*n_heads (1-b)**(n_flips - n_heads) for b in HYPOTHESES)

	def relative_odds(odds, i, j):
	return odds[i] / odds[j] if odds[j] > 0 else float('inf')

	def most_extreme_odds(i, odds_list):
	assert odds_list
	rel_odds = [relative_odds(o, i, -1) for o in odds_list]
	indexed_odds = list(enumerate(rel_odds))[LARGE_NUMBER:]

	max_i, max_odds = max(indexed_odds, key=lambda io: io[1])
	min_i, min_odds = min(indexed_odds, key=lambda io: io[1])
	return (max_odds, max_i + 1, min_odds, min_i + 1, rel_odds[-1], len(rel_odds))

	# Generates a report for a single run of the data.
	# (Not used by default, but you can use it yourself to inspect a single run.)
	def report(odds_list):
	for i in range(len(HYPOTHESES) - 1):
	max_odds, max_n, min_odds, min_n, final_odds, final_n = most_extreme_odds(i, odds_list)
	print('Odds that the coin was {:2.0f}% biased towards heads (as opposed to {:2.0f}%)'.format(
	100.0 * HYPOTHESES[i],
	100.0 * HYPOTHESES[-1]))
	print('\tMaximum: ({} : 1) after {} flips.'.format(max_odds, max_n))
	print('\tMinimum: ({} : 1) after {} flips.'.format(min_odds, min_n))
	print('\t Final: ({} : 1) after {} flips.'.format(final_odds, final_n))

	def report_threshold_breaches(i, odds_lists):
	assert odds_lists
	extremities = (most_extreme_odds(i, odds_list) for odds_list in odds_lists)
	middle_counters = {t: 0 for t in THRESHOLDS}
	middle_trackers = {t: [] for t in THRESHOLDS}
	final_counters = {t: 0 for t in THRESHOLDS}
	for max_odds, max_n, _, _, final_odds, _ in extremities:
	for threshold in THRESHOLDS:
	if max_odds >= threshold:
	middle_counters[threshold] += 1
	middle_trackers[threshold].append(max_n)
	if final_odds >= threshold:
	final_counters[threshold] += 1
	for threshold in THRESHOLDS:
	print('Odds for {:2.0f}% bias over {:2.0f}% went above ({} : 1) {} times, which was ~{:0.2f}% of the time'.format(
	100.0 * HYPOTHESES[i],
	100.0 * HYPOTHESES[-1],
	threshold,
	middle_counters[threshold],
	(100.0 * middle_counters[threshold]) / len(odds_lists)))
	print('\tand ended above ({} : 1) odds {} ({:0.0f}%) times.'.format(
	threshold,
	final_counters[threshold],
	(100.0 * final_counters[threshold]) / len(odds_lists)))
	print('\tthreshold max happened at around toss number {:0.2f}, on average.'.format(
	sum(middle_trackers[threshold]) / len(middle_trackers[threshold])))

	def run():
	odds_lists = [list(odds_sequence(gen_sequence())) for _ in range(RUNS)]
	for i in range(len(HYPOTHESES) - 1):
	report_threshold_breaches(i, odds_lists)
	print('')

	run()