Created
July 3, 2016 19:49
-
-
Save Soares/4955bb9268129476262b28e32b8ec979 to your computer and use it in GitHub Desktop.
Let's say you have a coin that you can flip at most 300 times, and you want to generate a "statistically significant" result, and you're willing to stop as soon as the data looks like it's going your way. How often will you be able to get p < 0.05? p < 0.01?
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Question: Let's say you have a coin, and you are willing to flip it a bunch of times, | |
# and you're going to stop as soon as you get a "statistically significant" bias in favor of heads. | |
# Let's say you are willing to flip it at most 300 times. | |
# What's the probability that you can get your statistically significant result? | |
from sys import stdout | |
from random import random | |
from scipy.misc import comb | |
# The actual bias of the coin | |
BIAS = 0.5 | |
# The null hypothesis bias of the coin | |
NULL = 0.5 | |
# The maximum number of times you can flip the coin | |
FLIPS = 300 | |
# We'll attempt to p-hack down to these thresholds. | |
THRESHOLDS = (0.05, 0.01) | |
# We won't count it against p-values if they're hackable using a sample size smaller than this. | |
LARGE_NUMBER = 30 | |
# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds. | |
RUNS = 1000 | |
def gen_sequence(): | |
for i in range(FLIPS): | |
yield 'H' if random() < BIAS else 'T' | |
def phack(data): | |
n_heads = 0 | |
n_flips = 0 | |
seq_prob = 1 | |
phacks = {} | |
for flip in data: | |
n_flips += 1 | |
seq_prob *= NULL | |
if flip == 'H': | |
n_heads += 1 | |
if n_flips < LARGE_NUMBER: | |
continue | |
p_value = seq_prob * sum(comb(n_flips, k) for k in range(n_heads, n_flips + 1)) | |
for threshold in THRESHOLDS: | |
if threshold not in phacks and p_value <= threshold: | |
phacks[threshold] = (p_value, n_flips) | |
if len(phacks) == len(THRESHOLDS): | |
break | |
return phacks | |
# Generates a report for a single run of the data. | |
# (Not used by default, but you can use it yourself to inspect a single run.) | |
def report(data): | |
phacks = phack(data) | |
for threshold in THRESHOLDS: | |
if threshold in phacks: | |
print('Hacked! Threshold p<{} with p={0.3f} hit after {} flips.'.format( | |
threshold, | |
phacks[threshold][0], | |
phacks[threshold][1])) | |
else: | |
print('Failed to hit the p<{} threshold.'.format(threshold)) | |
def report_hack_frequency(data_lists): | |
hacks = [] | |
for data in data_lists: | |
print('.', end='') | |
stdout.flush() | |
hacks.append(phack(data)) | |
print('') | |
threshold_counters = {threshold: 0 for threshold in THRESHOLDS} | |
threshold_times = {threshold: [] for threshold in THRESHOLDS} | |
for hack in hacks: | |
for threshold in THRESHOLDS: | |
if threshold in hack: | |
threshold_counters[threshold] += 1 | |
threshold_times[threshold].append(hack[threshold][1]) | |
for threshold in THRESHOLDS: | |
print('Hacked at the p<{} threshold {} times (~{:0.1f}%)'.format( | |
threshold, | |
threshold_counters[threshold], | |
100 * threshold_counters[threshold] / len(data_lists))) | |
print('\tHack occured after {:0.2f} tosses, on average.'.format( | |
sum(threshold_times[threshold]) / len(threshold_times[threshold]))) | |
def run(): | |
data_lists = [list(gen_sequence()) for _ in range(RUNS)] | |
report_hack_frequency(data_lists) | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment