Created
June 20, 2015 19:00
-
-
Save JohannesBuchner/7ed8966cfde16b7f8ebe to your computer and use it in GitHub Desktop.
Probability that two measurements actually have the same value
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy | |
import matplotlib.pyplot as plt | |
import scipy.stats | |
# two gaussian uncertainties with width sigma | |
# at distance delta | |
# what is the probability that they actually have the same value? | |
def compute_bayes(delta, border=5): | |
a = scipy.stats.norm() | |
b = scipy.stats.norm(loc=delta) | |
# integration space starts from the left border of the first gaussian | |
# and ends at the right edge of the second gaussian | |
x = numpy.linspace(-border, border + delta, 4000) | |
# which is where the prior is defined | |
prior = scipy.stats.uniform(loc=-border, scale=2*border + delta) | |
# model single-value: | |
Z1 = numpy.trapz(x=x, y=a.pdf(x) * b.pdf(x) * prior.pdf(x)) | |
# model two-value: | |
# the two integrals for the two values are independent | |
int_x = numpy.trapz(x=x, y=a.pdf(x) * prior.pdf(x)) | |
int_y = numpy.trapz(x=x, y=b.pdf(x) * prior.pdf(x)) | |
Z2 = int_x * int_y | |
return Z2 / (Z2 + Z1) | |
def compute_naive(delta): | |
a = scipy.stats.norm() | |
b = scipy.stats.norm(loc=delta) | |
# fraction of overlapping area | |
# at 5 sigma, the remaining area from each is negligible | |
border = 5 | |
x = numpy.linspace(-border, border + delta, 4000) | |
ap = a.pdf(x) | |
bp = b.pdf(x) | |
mul = numpy.min([ap, bp], axis=0) | |
Z2 = numpy.trapz(x=x, y=mul) | |
return 1 - Z2 | |
def compute_aic(delta): | |
a = scipy.stats.norm() | |
b = scipy.stats.norm(loc=delta) | |
loglike_1 = a.logpdf(0) + b.logpdf(delta) | |
# best case is in the middle | |
peak = delta / 2. | |
loglike_2 = a.logpdf(peak) + b.logpdf(peak) | |
deltachi2 = -2 * (loglike_2 - loglike_1) | |
# parameters are just the location | |
nparam2 = 2 | |
nparam1 = 1 | |
return deltachi2 - 2 * (nparam2 - nparam1) | |
def compute_bic(delta): | |
a = scipy.stats.norm() | |
b = scipy.stats.norm(loc=delta) | |
loglike_1 = a.logpdf(0) + b.logpdf(delta) | |
# best case is in the middle | |
peak = delta / 2. | |
loglike_2 = a.logpdf(peak) + b.logpdf(peak) | |
deltachi2 = -2 * (loglike_2 - loglike_1) | |
# parameters are just the location | |
nparam2 = 2 | |
nparam1 = 1 | |
nsample = 2 | |
return deltachi2 - numpy.log(nsample) * (nparam2 - nparam1) | |
def compute_bic_prob(delta): | |
a = scipy.stats.norm() | |
b = scipy.stats.norm(loc=delta) | |
loglike_1 = a.logpdf(0) + b.logpdf(delta) | |
# best case is in the middle | |
peak = delta / 2. | |
loglike_2 = a.logpdf(peak) + b.logpdf(peak) | |
# parameters are just the location | |
nparam2 = 2 | |
nparam1 = 1 | |
nsample = 2 | |
bic1 = -2 * loglike_1 - numpy.log(nsample) * nparam1 | |
bic2 = -2 * loglike_2 - numpy.log(nsample) * nparam2 | |
Z1 = -0.5 * numpy.exp(bic1) | |
Z2 = -0.5 * numpy.exp(bic2) | |
return Z2 / (Z2 + Z1) | |
deltas = [0, 1, 2, 3, 4, 5] | |
def test_bayes(): | |
BICBs = [compute_bic_prob(delta) for delta in deltas] | |
plt.plot(deltas, BICBs, ':', label='BIC') | |
Bnaives = [compute_naive(delta) for delta in deltas] | |
plt.plot(deltas, Bnaives, '-', label='1 - overlapping area') | |
alpha = 1 | |
for border in 3, 5, 1, 100: | |
Bs = [compute_bayes(delta, border=border) for delta in deltas] | |
plt.plot(deltas, Bs, 'o-', label='prior up to $%s\sigma$' % border, alpha=alpha) | |
alpha=0.1 | |
for d in deltas: | |
if int(d) != d: continue | |
s = 0.04 | |
a = scipy.stats.norm(d, s) | |
b = scipy.stats.norm(d+s*d, s) | |
x = numpy.linspace(d-3*s, d+s*d+3*s, 100) | |
ap = a.pdf(x) | |
bp = b.pdf(x) | |
plt.fill(x, ap / ap.max() * 0.1, color='grey', alpha=0.5) | |
plt.fill(x, bp / bp.max() * 0.1, color='blue', alpha=0.5) | |
plt.xlabel('distance in units of $\sigma$') | |
plt.ylabel('prob(different|D)') | |
plt.xlim(-0.5, 5.5) | |
plt.legend(loc='best') | |
plt.savefig('overlapgauss_bayes.pdf', bbox_inches='tight') | |
plt.savefig('overlapgauss_bayes.png', dpi=140, bbox_inches='tight') | |
plt.close() | |
def test_aic(): | |
import matplotlib.patches as patches | |
AICs = [compute_aic(delta) for delta in deltas] | |
plt.plot(deltas, AICs, 's-') | |
#plt.text(1, 8) | |
plt.text(0, 10, 'Are two measurements distinct?', va='center') | |
plt.text(1, 7, 'Different value', va='center') | |
plt.text(2, -1.5, 'Same value', va='center') | |
plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 6, alpha=0.1, linewidth=0, color='grey')) | |
plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 5, alpha=0.2, linewidth=0, color='grey')) | |
plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 4, alpha=0.3, linewidth=0, color='grey')) | |
for d in deltas: | |
if int(d) != d: continue | |
s = 0.04 | |
a = scipy.stats.norm(d, s) | |
b = scipy.stats.norm(d+s*d, s) | |
x = numpy.linspace(d-3*s, d+s*d+3*s, 100) | |
ap = a.pdf(x) | |
bp = b.pdf(x) | |
plt.fill(x, ap / ap.max() * 1 - 4, color='grey', alpha=0.5) | |
plt.fill(x, bp / bp.max() * 1 - 4, color='blue', alpha=0.5) | |
plt.ylabel('Akaike Information Criterion') | |
plt.xlim(-0.5, 5.5) | |
plt.xlabel('distance in units of $\sigma$') | |
plt.savefig('overlapgauss_aic.pdf', bbox_inches='tight') | |
plt.savefig('overlapgauss_aic.png', dpi=140, bbox_inches='tight') | |
plt.close() | |
test_aic() | |
test_bayes() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment