tyarkoni/p_hacked_effect_sizes.py

## p_hacked_effect_sizes.py
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
%matplotlib inline

def run_study(step_size=50, max_n=200, num_tests=10, alpha=0.05):
    ''' Run a single study in increments of N until we either (a) achieve
    significance, or (b) hit a maximum sample size. To model p-hacking, we
    conduct num_tests independent tests after each increment of sampling. '''
    X = np.zeros((0, num_tests))
    n = 0
    while n < max_n:
        X = np.r_[X, np.random.normal(size=(step_size, num_tests))]
        t, p = stats.ttest_1samp(X, 0)
        min_ind = np.argmin(p)
        min_p = p[min_ind]
        n += step_size
        if min_p < 0.05:
            break
        # As a null distribution, don't take the smallest p value
        # if there's no sig effect
        min_ind = 0
    return min_p, t[min_ind], n, X[:, min_ind]

def plot_effect_sizes(n_reps=100, min_n=10, **kwargs):
    ''' Sample n_reps studies per the procedure in run_study(), and
    plot the standardized effect size of the result, coloring by
    significance. '''
    studies = [run_study(**kwargs) for i in range(n_reps)]
    # Count sig vs. non-sig
    p = np.array([s[0] for s in studies])
    print("prop. significant studies: %.2f" % (p < 0.05).mean())
    for p, t, n, X in studies:
        # Recode all significant effects in same direction
        if p < .05 and X.mean() < 0:
            X *= -1
        color = 'blue' if p < .05 else 'gray'
        d = [X[:i].mean()/X[:i].std() for i in range(min_n, n)]
        plt.plot(np.arange(min_n, n), d, color=color, lw=2, alpha=0.2)
        plt.hlines(0, min_n, 200, linestyles='--', lw=0.5)
        plt.gcf().set_size_inches(12, 6)

plot_effect_sizes()
	import numpy as np
	from scipy import stats
	import matplotlib.pyplot as plt
	%matplotlib inline

	def run_study(step_size=50, max_n=200, num_tests=10, alpha=0.05):
	''' Run a single study in increments of N until we either (a) achieve
	significance, or (b) hit a maximum sample size. To model p-hacking, we
	conduct num_tests independent tests after each increment of sampling. '''
	X = np.zeros((0, num_tests))
	n = 0
	while n < max_n:
	X = np.r_[X, np.random.normal(size=(step_size, num_tests))]
	t, p = stats.ttest_1samp(X, 0)
	min_ind = np.argmin(p)
	min_p = p[min_ind]
	n += step_size
	if min_p < 0.05:
	break
	# As a null distribution, don't take the smallest p value
	# if there's no sig effect
	min_ind = 0
	return min_p, t[min_ind], n, X[:, min_ind]

	def plot_effect_sizes(n_reps=100, min_n=10, **kwargs):
	''' Sample n_reps studies per the procedure in run_study(), and
	plot the standardized effect size of the result, coloring by
	significance. '''
	studies = [run_study(**kwargs) for i in range(n_reps)]
	# Count sig vs. non-sig
	p = np.array([s[0] for s in studies])
	print("prop. significant studies: %.2f" % (p < 0.05).mean())
	for p, t, n, X in studies:
	# Recode all significant effects in same direction
	if p < .05 and X.mean() < 0:
	X *= -1
	color = 'blue' if p < .05 else 'gray'
	d = [X[:i].mean()/X[:i].std() for i in range(min_n, n)]
	plt.plot(np.arange(min_n, n), d, color=color, lw=2, alpha=0.2)
	plt.hlines(0, min_n, 200, linestyles='--', lw=0.5)
	plt.gcf().set_size_inches(12, 6)

	plot_effect_sizes()