Skip to content

Instantly share code, notes, and snippets.

@Soares
Created July 3, 2016 19:49
Show Gist options
  • Save Soares/4955bb9268129476262b28e32b8ec979 to your computer and use it in GitHub Desktop.
Save Soares/4955bb9268129476262b28e32b8ec979 to your computer and use it in GitHub Desktop.
Let's say you have a coin that you can flip at most 300 times, and you want to generate a "statistically significant" result, and you're willing to stop as soon as the data looks like it's going your way. How often will you be able to get p < 0.05? p < 0.01?
# Question: Let's say you have a coin, and you are willing to flip it a bunch of times,
# and you're going to stop as soon as you get a "statistically significant" bias in favor of heads.
# Let's say you are willing to flip it at most 300 times.
# What's the probability that you can get your statistically significant result?
from sys import stdout
from random import random
from scipy.misc import comb
# The actual bias of the coin
BIAS = 0.5
# The null hypothesis bias of the coin
NULL = 0.5
# The maximum number of times you can flip the coin
FLIPS = 300
# We'll attempt to p-hack down to these thresholds.
THRESHOLDS = (0.05, 0.01)
# We won't count it against p-values if they're hackable using a sample size smaller than this.
LARGE_NUMBER = 30
# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds.
RUNS = 1000
def gen_sequence():
for i in range(FLIPS):
yield 'H' if random() < BIAS else 'T'
def phack(data):
n_heads = 0
n_flips = 0
seq_prob = 1
phacks = {}
for flip in data:
n_flips += 1
seq_prob *= NULL
if flip == 'H':
n_heads += 1
if n_flips < LARGE_NUMBER:
continue
p_value = seq_prob * sum(comb(n_flips, k) for k in range(n_heads, n_flips + 1))
for threshold in THRESHOLDS:
if threshold not in phacks and p_value <= threshold:
phacks[threshold] = (p_value, n_flips)
if len(phacks) == len(THRESHOLDS):
break
return phacks
# Generates a report for a single run of the data.
# (Not used by default, but you can use it yourself to inspect a single run.)
def report(data):
phacks = phack(data)
for threshold in THRESHOLDS:
if threshold in phacks:
print('Hacked! Threshold p<{} with p={0.3f} hit after {} flips.'.format(
threshold,
phacks[threshold][0],
phacks[threshold][1]))
else:
print('Failed to hit the p<{} threshold.'.format(threshold))
def report_hack_frequency(data_lists):
hacks = []
for data in data_lists:
print('.', end='')
stdout.flush()
hacks.append(phack(data))
print('')
threshold_counters = {threshold: 0 for threshold in THRESHOLDS}
threshold_times = {threshold: [] for threshold in THRESHOLDS}
for hack in hacks:
for threshold in THRESHOLDS:
if threshold in hack:
threshold_counters[threshold] += 1
threshold_times[threshold].append(hack[threshold][1])
for threshold in THRESHOLDS:
print('Hacked at the p<{} threshold {} times (~{:0.1f}%)'.format(
threshold,
threshold_counters[threshold],
100 * threshold_counters[threshold] / len(data_lists)))
print('\tHack occured after {:0.2f} tosses, on average.'.format(
sum(threshold_times[threshold]) / len(threshold_times[threshold])))
def run():
data_lists = [list(gen_sequence()) for _ in range(RUNS)]
report_hack_frequency(data_lists)
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment