Skip to content

Instantly share code, notes, and snippets.

@Soares

Soares/coin_bias_test.py

Last active Sep 20, 2020
Embed
What would you like to do?
Let's say you have a fair coin, and can flip it at most 300 times. What percentage of the time will tossing the coin generate a sequence that, at some point along the way, supports the hypothesis "the coin is biased 75% towards heads" 20x as much as it supports the hypothesis "the coin is fair"?
# Let's say you have a fair coin.
# Let's say you don't know whether or not it's actually biased towards heads by a certain degree.
# Let's say you can flip it at most a certain number of times.
# What's the probability that, at some point along the way, it looks pretty likely to be biased?
from random import random
from scipy.misc import comb
# The actual bias of the coin
BIAS = 0.5
# The number of times to flip the coin
FLIPS = 300
# All hypotheses will be compared only to the final hypothesis
HYPOTHESES = (0.75, 0.55, 0.5)
# We'll check how often the likelihood ratio goes above these values
THRESHOLDS = (20, 100)
# Ignore extreme odds that occur before at least this much data has been collected:
LARGE_NUMBER = 0
# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds.
RUNS = 10000
def gen_sequence():
for i in range(FLIPS):
yield 'H' if random() < BIAS else 'T'
def odds_sequence(sequence):
n_heads = 0
n_flips = 0
for flip in sequence:
n_flips += 1
if flip == 'H':
n_heads += 1
yield tuple(b**n_heads * (1-b)**(n_flips - n_heads) for b in HYPOTHESES)
def relative_odds(odds, i, j):
return odds[i] / odds[j] if odds[j] > 0 else float('inf')
def most_extreme_odds(i, odds_list):
assert odds_list
rel_odds = [relative_odds(o, i, -1) for o in odds_list]
indexed_odds = list(enumerate(rel_odds))[LARGE_NUMBER:]
max_i, max_odds = max(indexed_odds, key=lambda io: io[1])
min_i, min_odds = min(indexed_odds, key=lambda io: io[1])
return (max_odds, max_i + 1, min_odds, min_i + 1, rel_odds[-1], len(rel_odds))
# Generates a report for a single run of the data.
# (Not used by default, but you can use it yourself to inspect a single run.)
def report(odds_list):
for i in range(len(HYPOTHESES) - 1):
max_odds, max_n, min_odds, min_n, final_odds, final_n = most_extreme_odds(i, odds_list)
print('Odds that the coin was {:2.0f}% biased towards heads (as opposed to {:2.0f}%)'.format(
100.0 * HYPOTHESES[i],
100.0 * HYPOTHESES[-1]))
print('\tMaximum: ({} : 1) after {} flips.'.format(max_odds, max_n))
print('\tMinimum: ({} : 1) after {} flips.'.format(min_odds, min_n))
print('\t Final: ({} : 1) after {} flips.'.format(final_odds, final_n))
def report_threshold_breaches(i, odds_lists):
assert odds_lists
extremities = (most_extreme_odds(i, odds_list) for odds_list in odds_lists)
middle_counters = {t: 0 for t in THRESHOLDS}
middle_trackers = {t: [] for t in THRESHOLDS}
final_counters = {t: 0 for t in THRESHOLDS}
for max_odds, max_n, _, _, final_odds, _ in extremities:
for threshold in THRESHOLDS:
if max_odds >= threshold:
middle_counters[threshold] += 1
middle_trackers[threshold].append(max_n)
if final_odds >= threshold:
final_counters[threshold] += 1
for threshold in THRESHOLDS:
print('Odds for {:2.0f}% bias over {:2.0f}% went above ({} : 1) {} times, which was ~{:0.2f}% of the time'.format(
100.0 * HYPOTHESES[i],
100.0 * HYPOTHESES[-1],
threshold,
middle_counters[threshold],
(100.0 * middle_counters[threshold]) / len(odds_lists)))
print('\tand ended above ({} : 1) odds {} ({:0.0f}%) times.'.format(
threshold,
final_counters[threshold],
(100.0 * final_counters[threshold]) / len(odds_lists)))
print('\tthreshold max happened at around toss number {:0.2f}, on average.'.format(
sum(middle_trackers[threshold]) / len(middle_trackers[threshold])))
def run():
odds_lists = [list(odds_sequence(gen_sequence())) for _ in range(RUNS)]
for i in range(len(HYPOTHESES) - 1):
report_threshold_breaches(i, odds_lists)
print('')
run()
@3dots

This comment has been minimized.

Copy link

@3dots 3dots commented Oct 10, 2018

odds_sequence(sequence): is actually Probability Sequence based on the current sequence of flips. i.e. how likely it is that this exact sequence of flips would occur given this supposed hypothesized bias.
Reading about odds in https://arbital.com/p/bayes_extraordinary_claims/?pathId=41159, odds are usually a ratio, like 3 : 2.
Your thing is returning 3/(2+3) = 0.6 kind of thing, not the 'odds' per se.

@seisvelas

This comment has been minimized.

Copy link

@seisvelas seisvelas commented Aug 19, 2019

LARGE_NUMBER = 0

This glorious little piece of irony made me chuckle :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.