Soares/phacker.py

## phacker.py
# Question: Let's say you have a coin, and you are willing to flip it a bunch of times,
# and you're going to stop as soon as you get a "statistically significant" bias in favor of heads.
# Let's say you are willing to flip it at most 300 times.
# What's the probability that you can get your statistically significant result?
from sys import stdout
from random import random
from scipy.misc import comb

# The actual bias of the coin
BIAS = 0.5
# The null hypothesis bias of the coin
NULL = 0.5
# The maximum number of times you can flip the coin
FLIPS = 300
# We'll attempt to p-hack down to these thresholds.
THRESHOLDS = (0.05, 0.01)
# We won't count it against p-values if they're hackable using a sample size smaller than this.
LARGE_NUMBER = 30
# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds.
RUNS = 1000

def gen_sequence():
    for i in range(FLIPS):
        yield 'H' if random() < BIAS else 'T'

def phack(data):
    n_heads = 0
    n_flips = 0
    seq_prob = 1
    phacks = {}
    for flip in data:
        n_flips += 1
        seq_prob *= NULL
        if flip == 'H':
            n_heads += 1
        if n_flips < LARGE_NUMBER:
            continue
        p_value = seq_prob * sum(comb(n_flips, k) for k in range(n_heads, n_flips + 1))
        for threshold in THRESHOLDS:
            if threshold not in phacks and p_value <= threshold:
                phacks[threshold] = (p_value, n_flips)
        if len(phacks) == len(THRESHOLDS):
            break
    return phacks

# Generates a report for a single run of the data.
# (Not used by default, but you can use it yourself to inspect a single run.)
def report(data):
    phacks = phack(data)
    for threshold in THRESHOLDS:
        if threshold in phacks:
            print('Hacked! Threshold p<{} with p={0.3f} hit after {} flips.'.format(
                threshold,
                phacks[threshold][0],
                phacks[threshold][1]))
        else:
            print('Failed to hit the p<{} threshold.'.format(threshold))

def report_hack_frequency(data_lists):
    hacks = []
    for data in data_lists:
        print('.', end='')
        stdout.flush()
        hacks.append(phack(data))
    print('')
    threshold_counters = {threshold: 0 for threshold in THRESHOLDS}
    threshold_times = {threshold: [] for threshold in THRESHOLDS}
    for hack in hacks:
        for threshold in THRESHOLDS:
            if threshold in hack:
                threshold_counters[threshold] += 1
                threshold_times[threshold].append(hack[threshold][1])
    for threshold in THRESHOLDS:
        print('Hacked at the p<{} threshold {} times (~{:0.1f}%)'.format(
            threshold,
            threshold_counters[threshold],
            100 * threshold_counters[threshold] / len(data_lists)))
        print('\tHack occured after {:0.2f} tosses, on average.'.format(
            sum(threshold_times[threshold]) / len(threshold_times[threshold])))

def run():
    data_lists = [list(gen_sequence()) for _ in range(RUNS)]
    report_hack_frequency(data_lists)

run()
	# Question: Let's say you have a coin, and you are willing to flip it a bunch of times,
	# and you're going to stop as soon as you get a "statistically significant" bias in favor of heads.
	# Let's say you are willing to flip it at most 300 times.
	# What's the probability that you can get your statistically significant result?
	from sys import stdout
	from random import random
	from scipy.misc import comb

	# The actual bias of the coin
	BIAS = 0.5
	# The null hypothesis bias of the coin
	NULL = 0.5
	# The maximum number of times you can flip the coin
	FLIPS = 300
	# We'll attempt to p-hack down to these thresholds.
	THRESHOLDS = (0.05, 0.01)
	# We won't count it against p-values if they're hackable using a sample size smaller than this.
	LARGE_NUMBER = 30
	# We'll run this many squences of FLIPS flips to approximate the probability of exceeding the above thresholds.
	RUNS = 1000

	def gen_sequence():
	for i in range(FLIPS):
	yield 'H' if random() < BIAS else 'T'

	def phack(data):
	n_heads = 0
	n_flips = 0
	seq_prob = 1
	phacks = {}
	for flip in data:
	n_flips += 1
	seq_prob *= NULL
	if flip == 'H':
	n_heads += 1
	if n_flips < LARGE_NUMBER:
	continue
	p_value = seq_prob * sum(comb(n_flips, k) for k in range(n_heads, n_flips + 1))
	for threshold in THRESHOLDS:
	if threshold not in phacks and p_value <= threshold:
	phacks[threshold] = (p_value, n_flips)
	if len(phacks) == len(THRESHOLDS):
	break
	return phacks

	# Generates a report for a single run of the data.
	# (Not used by default, but you can use it yourself to inspect a single run.)
	def report(data):
	phacks = phack(data)
	for threshold in THRESHOLDS:
	if threshold in phacks:
	print('Hacked! Threshold p<{} with p={0.3f} hit after {} flips.'.format(
	threshold,
	phacks[threshold][0],
	phacks[threshold][1]))
	else:
	print('Failed to hit the p<{} threshold.'.format(threshold))

	def report_hack_frequency(data_lists):
	hacks = []
	for data in data_lists:
	print('.', end='')
	stdout.flush()
	hacks.append(phack(data))
	print('')
	threshold_counters = {threshold: 0 for threshold in THRESHOLDS}
	threshold_times = {threshold: [] for threshold in THRESHOLDS}
	for hack in hacks:
	for threshold in THRESHOLDS:
	if threshold in hack:
	threshold_counters[threshold] += 1
	threshold_times[threshold].append(hack[threshold][1])
	for threshold in THRESHOLDS:
	print('Hacked at the p<{} threshold {} times (~{:0.1f}%)'.format(
	threshold,
	threshold_counters[threshold],
	100 * threshold_counters[threshold] / len(data_lists)))
	print('\tHack occured after {:0.2f} tosses, on average.'.format(
	sum(threshold_times[threshold]) / len(threshold_times[threshold])))

	def run():
	data_lists = [list(gen_sequence()) for _ in range(RUNS)]
	report_hack_frequency(data_lists)

	run()