import matplotlib.pyplot as plt import numpy import scipy.stats # http://www.medpagetoday.com/Blogs/TheMethodsMan/52171 def calc_reliability(p, power=0.8, frac_true=0.1): """ Given this p-value, power of the test and fraction of hypotheses that are actually true. If we receive a negative result, how probable is it that the hypothesis is false? If we receive a positive result, how probable is it that the hypothesis is true? """ frac_false = 1 - frac_true frac_false_pos = frac_false * p frac_false_neg = frac_false * (1 - p) # false, and undetected frac_true_pos = power * frac_true frac_true_neg = (1 - power) * frac_true # true, but undetected frac_pos_total = frac_true_pos + frac_false_pos frac_neg_total = frac_true_neg + frac_false_neg reliability_pos = frac_true_pos / frac_pos_total reliability_neg = frac_true_neg / frac_neg_total return reliability_pos, reliability_neg # p = prob to have as extreme data if false p = 0.05 results = [] p = numpy.logspace(-3, 0, 400) power = 0.8 frac_true = 0.1 reliability_pos, reliability_neg = calc_reliability(p, power=power, frac_true=frac_true) plt.plot(p, reliability_pos, '-', label='"significant" detection', color='r') plt.plot(p, reliability_neg, '--', label='no significant detection', color='g') #plt.plot(p, 1-p/frac_true, '--') threesigma, twosigma = scipy.stats.norm().cdf([-3,-2]) for threshold, label in [(threesigma, '$3\sigma$'), (twosigma, '$2\sigma$'), (0.01, 0.01), (0.05, 0.05), (0.1, 0.1)]: pos, neg = calc_reliability(threshold, power=power, frac_true=frac_true) plt.vlines(threshold, 0, pos, linestyles=['dashed'], alpha=0.5) plt.text(threshold, pos, '%.0f%% for p<%s' % (pos * 100, label), va='bottom', ha='left', fontsize=8) plt.ylim(0, 1.06) plt.xlabel('p-value') plt.ylabel('Prob. that test gave the right answer') plt.gca().set_xscale('log') plt.legend(loc='lower left', frameon=True) plt.savefig('pvalue.pdf', bbox_inches='tight') plt.savefig('pvalue.png', bbox_inches='tight')