Skip to content

Instantly share code, notes, and snippets.

@Soares
Last active January 17, 2022 14:32
Show Gist options
  • Save Soares/941bdb13233fd0838f1882d148c9ac14 to your computer and use it in GitHub Desktop.
Save Soares/941bdb13233fd0838f1882d148c9ac14 to your computer and use it in GitHub Desktop.
Let's say you have a fair coin, and can flip it at most 300 times. What percentage of the time will tossing the coin generate a sequence that, at some point along the way, supports the hypothesis "the coin is biased 75% towards heads" 20x as much as it supports the hypothesis "the coin is fair"?
# Let's say you have a fair coin.
# Let's say you don't know whether or not it's actually biased towards heads by a certain degree.
# Let's say you can flip it at most a certain number of times.
# What's the probability that, at some point along the way, it looks pretty likely to be biased?
from random import random
from scipy.misc import comb
# The actual bias of the coin
BIAS = 0.5
# The number of times to flip the coin
FLIPS = 300
# All hypotheses will be compared only to the final hypothesis
HYPOTHESES = (0.75, 0.55, 0.5)
# We'll check how often the likelihood ratio goes above these values
THRESHOLDS = (20, 100)
# Ignore extreme odds that occur before at least this much data has been collected:
LARGE_NUMBER = 0
# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds.
RUNS = 10000
def gen_sequence():
for i in range(FLIPS):
yield 'H' if random() < BIAS else 'T'
def odds_sequence(sequence):
n_heads = 0
n_flips = 0
for flip in sequence:
n_flips += 1
if flip == 'H':
n_heads += 1
yield tuple(b**n_heads * (1-b)**(n_flips - n_heads) for b in HYPOTHESES)
def relative_odds(odds, i, j):
return odds[i] / odds[j] if odds[j] > 0 else float('inf')
def most_extreme_odds(i, odds_list):
assert odds_list
rel_odds = [relative_odds(o, i, -1) for o in odds_list]
indexed_odds = list(enumerate(rel_odds))[LARGE_NUMBER:]
max_i, max_odds = max(indexed_odds, key=lambda io: io[1])
min_i, min_odds = min(indexed_odds, key=lambda io: io[1])
return (max_odds, max_i + 1, min_odds, min_i + 1, rel_odds[-1], len(rel_odds))
# Generates a report for a single run of the data.
# (Not used by default, but you can use it yourself to inspect a single run.)
def report(odds_list):
for i in range(len(HYPOTHESES) - 1):
max_odds, max_n, min_odds, min_n, final_odds, final_n = most_extreme_odds(i, odds_list)
print('Odds that the coin was {:2.0f}% biased towards heads (as opposed to {:2.0f}%)'.format(
100.0 * HYPOTHESES[i],
100.0 * HYPOTHESES[-1]))
print('\tMaximum: ({} : 1) after {} flips.'.format(max_odds, max_n))
print('\tMinimum: ({} : 1) after {} flips.'.format(min_odds, min_n))
print('\t Final: ({} : 1) after {} flips.'.format(final_odds, final_n))
def report_threshold_breaches(i, odds_lists):
assert odds_lists
extremities = (most_extreme_odds(i, odds_list) for odds_list in odds_lists)
middle_counters = {t: 0 for t in THRESHOLDS}
middle_trackers = {t: [] for t in THRESHOLDS}
final_counters = {t: 0 for t in THRESHOLDS}
for max_odds, max_n, _, _, final_odds, _ in extremities:
for threshold in THRESHOLDS:
if max_odds >= threshold:
middle_counters[threshold] += 1
middle_trackers[threshold].append(max_n)
if final_odds >= threshold:
final_counters[threshold] += 1
for threshold in THRESHOLDS:
print('Odds for {:2.0f}% bias over {:2.0f}% went above ({} : 1) {} times, which was ~{:0.2f}% of the time'.format(
100.0 * HYPOTHESES[i],
100.0 * HYPOTHESES[-1],
threshold,
middle_counters[threshold],
(100.0 * middle_counters[threshold]) / len(odds_lists)))
print('\tand ended above ({} : 1) odds {} ({:0.0f}%) times.'.format(
threshold,
final_counters[threshold],
(100.0 * final_counters[threshold]) / len(odds_lists)))
print('\tthreshold max happened at around toss number {:0.2f}, on average.'.format(
sum(middle_trackers[threshold]) / len(middle_trackers[threshold])))
def run():
odds_lists = [list(odds_sequence(gen_sequence())) for _ in range(RUNS)]
for i in range(len(HYPOTHESES) - 1):
report_threshold_breaches(i, odds_lists)
print('')
run()
@seisvelas
Copy link

LARGE_NUMBER = 0

This glorious little piece of irony made me chuckle :)

@mathewcohle
Copy link

you might want to update:

- from scipy.misc import comb
+ from scipy.special import comb

https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.comb.html
(or state the version of scipy in which the script is supposed to work :))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment