JohannesBuchner/pvalue.py

## pvalue.py
import matplotlib.pyplot as plt
import numpy
import scipy.stats

# http://www.medpagetoday.com/Blogs/TheMethodsMan/52171

def calc_reliability(p, power=0.8, frac_true=0.1):
	"""
	Given this p-value, power of the test and fraction of hypotheses that
	are actually true.
	If we receive a negative result, how probable is it that the hypothesis
	is false?
	If we receive a positive result, how probable is it that the hypothesis
	is true?
	"""
	frac_false = 1 - frac_true
	frac_false_pos = frac_false * p
	frac_false_neg = frac_false * (1 - p) # false, and undetected
	frac_true_pos = power * frac_true
	frac_true_neg = (1 - power) * frac_true # true, but undetected

	frac_pos_total = frac_true_pos + frac_false_pos
	frac_neg_total = frac_true_neg + frac_false_neg

	reliability_pos = frac_true_pos / frac_pos_total
	reliability_neg = frac_true_neg / frac_neg_total
	return reliability_pos, reliability_neg

# p = prob to have as extreme data if false

p = 0.05

results = []
p = numpy.logspace(-3, 0, 400)
power = 0.8
frac_true = 0.1
reliability_pos, reliability_neg = calc_reliability(p, power=power, frac_true=frac_true)

plt.plot(p, reliability_pos, '-', label='"significant" detection', color='r')
plt.plot(p, reliability_neg, '--', label='no significant detection', color='g')
#plt.plot(p, 1-p/frac_true, '--')
threesigma, twosigma = scipy.stats.norm().cdf([-3,-2])
for threshold, label in [(threesigma, '$3\sigma$'), (twosigma, '$2\sigma$'), (0.01, 0.01), (0.05, 0.05), (0.1, 0.1)]:
	pos, neg = calc_reliability(threshold, power=power, frac_true=frac_true)
	plt.vlines(threshold, 0, pos, linestyles=['dashed'], alpha=0.5)
	plt.text(threshold, pos, '%.0f%% for p<%s' % (pos * 100, label), va='bottom', ha='left', fontsize=8)

plt.ylim(0, 1.06)
plt.xlabel('p-value')
plt.ylabel('Prob. that test gave the right answer')
plt.gca().set_xscale('log')
plt.legend(loc='lower left', frameon=True)
plt.savefig('pvalue.pdf', bbox_inches='tight')
plt.savefig('pvalue.png', bbox_inches='tight')
	import matplotlib.pyplot as plt
	import numpy
	import scipy.stats

	# http://www.medpagetoday.com/Blogs/TheMethodsMan/52171

	def calc_reliability(p, power=0.8, frac_true=0.1):
	"""
	Given this p-value, power of the test and fraction of hypotheses that
	are actually true.
	If we receive a negative result, how probable is it that the hypothesis
	is false?
	If we receive a positive result, how probable is it that the hypothesis
	is true?
	"""
	frac_false = 1 - frac_true
	frac_false_pos = frac_false * p
	frac_false_neg = frac_false * (1 - p) # false, and undetected
	frac_true_pos = power * frac_true
	frac_true_neg = (1 - power) * frac_true # true, but undetected

	frac_pos_total = frac_true_pos + frac_false_pos
	frac_neg_total = frac_true_neg + frac_false_neg

	reliability_pos = frac_true_pos / frac_pos_total
	reliability_neg = frac_true_neg / frac_neg_total
	return reliability_pos, reliability_neg

	# p = prob to have as extreme data if false

	p = 0.05

	results = []
	p = numpy.logspace(-3, 0, 400)
	power = 0.8
	frac_true = 0.1
	reliability_pos, reliability_neg = calc_reliability(p, power=power, frac_true=frac_true)

	plt.plot(p, reliability_pos, '-', label='"significant" detection', color='r')
	plt.plot(p, reliability_neg, '--', label='no significant detection', color='g')
	#plt.plot(p, 1-p/frac_true, '--')
	threesigma, twosigma = scipy.stats.norm().cdf([-3,-2])
	for threshold, label in [(threesigma, '$3\sigma$'), (twosigma, '$2\sigma$'), (0.01, 0.01), (0.05, 0.05), (0.1, 0.1)]:
	pos, neg = calc_reliability(threshold, power=power, frac_true=frac_true)
	plt.vlines(threshold, 0, pos, linestyles=['dashed'], alpha=0.5)
	plt.text(threshold, pos, '%.0f%% for p<%s' % (pos * 100, label), va='bottom', ha='left', fontsize=8)

	plt.ylim(0, 1.06)
	plt.xlabel('p-value')
	plt.ylabel('Prob. that test gave the right answer')
	plt.gca().set_xscale('log')
	plt.legend(loc='lower left', frameon=True)
	plt.savefig('pvalue.pdf', bbox_inches='tight')
	plt.savefig('pvalue.png', bbox_inches='tight')