Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Probability that two measurements actually have the same value
import numpy
import matplotlib.pyplot as plt
import scipy.stats
# two gaussian uncertainties with width sigma
# at distance delta
# what is the probability that they actually have the same value?
def compute_bayes(delta, border=5):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
# integration space starts from the left border of the first gaussian
# and ends at the right edge of the second gaussian
x = numpy.linspace(-border, border + delta, 4000)
# which is where the prior is defined
prior = scipy.stats.uniform(loc=-border, scale=2*border + delta)
# model single-value:
Z1 = numpy.trapz(x=x, y=a.pdf(x) * b.pdf(x) * prior.pdf(x))
# model two-value:
# the two integrals for the two values are independent
int_x = numpy.trapz(x=x, y=a.pdf(x) * prior.pdf(x))
int_y = numpy.trapz(x=x, y=b.pdf(x) * prior.pdf(x))
Z2 = int_x * int_y
return Z2 / (Z2 + Z1)
def compute_naive(delta):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
# fraction of overlapping area
# at 5 sigma, the remaining area from each is negligible
border = 5
x = numpy.linspace(-border, border + delta, 4000)
ap = a.pdf(x)
bp = b.pdf(x)
mul = numpy.min([ap, bp], axis=0)
Z2 = numpy.trapz(x=x, y=mul)
return 1 - Z2
def compute_aic(delta):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
loglike_1 = a.logpdf(0) + b.logpdf(delta)
# best case is in the middle
peak = delta / 2.
loglike_2 = a.logpdf(peak) + b.logpdf(peak)
deltachi2 = -2 * (loglike_2 - loglike_1)
# parameters are just the location
nparam2 = 2
nparam1 = 1
return deltachi2 - 2 * (nparam2 - nparam1)
def compute_bic(delta):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
loglike_1 = a.logpdf(0) + b.logpdf(delta)
# best case is in the middle
peak = delta / 2.
loglike_2 = a.logpdf(peak) + b.logpdf(peak)
deltachi2 = -2 * (loglike_2 - loglike_1)
# parameters are just the location
nparam2 = 2
nparam1 = 1
nsample = 2
return deltachi2 - numpy.log(nsample) * (nparam2 - nparam1)
def compute_bic_prob(delta):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
loglike_1 = a.logpdf(0) + b.logpdf(delta)
# best case is in the middle
peak = delta / 2.
loglike_2 = a.logpdf(peak) + b.logpdf(peak)
# parameters are just the location
nparam2 = 2
nparam1 = 1
nsample = 2
bic1 = -2 * loglike_1 - numpy.log(nsample) * nparam1
bic2 = -2 * loglike_2 - numpy.log(nsample) * nparam2
Z1 = -0.5 * numpy.exp(bic1)
Z2 = -0.5 * numpy.exp(bic2)
return Z2 / (Z2 + Z1)
deltas = [0, 1, 2, 3, 4, 5]
def test_bayes():
BICBs = [compute_bic_prob(delta) for delta in deltas]
plt.plot(deltas, BICBs, ':', label='BIC')
Bnaives = [compute_naive(delta) for delta in deltas]
plt.plot(deltas, Bnaives, '-', label='1 - overlapping area')
alpha = 1
for border in 3, 5, 1, 100:
Bs = [compute_bayes(delta, border=border) for delta in deltas]
plt.plot(deltas, Bs, 'o-', label='prior up to $%s\sigma$' % border, alpha=alpha)
alpha=0.1
for d in deltas:
if int(d) != d: continue
s = 0.04
a = scipy.stats.norm(d, s)
b = scipy.stats.norm(d+s*d, s)
x = numpy.linspace(d-3*s, d+s*d+3*s, 100)
ap = a.pdf(x)
bp = b.pdf(x)
plt.fill(x, ap / ap.max() * 0.1, color='grey', alpha=0.5)
plt.fill(x, bp / bp.max() * 0.1, color='blue', alpha=0.5)
plt.xlabel('distance in units of $\sigma$')
plt.ylabel('prob(different|D)')
plt.xlim(-0.5, 5.5)
plt.legend(loc='best')
plt.savefig('overlapgauss_bayes.pdf', bbox_inches='tight')
plt.savefig('overlapgauss_bayes.png', dpi=140, bbox_inches='tight')
plt.close()
def test_aic():
import matplotlib.patches as patches
AICs = [compute_aic(delta) for delta in deltas]
plt.plot(deltas, AICs, 's-')
#plt.text(1, 8)
plt.text(0, 10, 'Are two measurements distinct?', va='center')
plt.text(1, 7, 'Different value', va='center')
plt.text(2, -1.5, 'Same value', va='center')
plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 6, alpha=0.1, linewidth=0, color='grey'))
plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 5, alpha=0.2, linewidth=0, color='grey'))
plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 4, alpha=0.3, linewidth=0, color='grey'))
for d in deltas:
if int(d) != d: continue
s = 0.04
a = scipy.stats.norm(d, s)
b = scipy.stats.norm(d+s*d, s)
x = numpy.linspace(d-3*s, d+s*d+3*s, 100)
ap = a.pdf(x)
bp = b.pdf(x)
plt.fill(x, ap / ap.max() * 1 - 4, color='grey', alpha=0.5)
plt.fill(x, bp / bp.max() * 1 - 4, color='blue', alpha=0.5)
plt.ylabel('Akaike Information Criterion')
plt.xlim(-0.5, 5.5)
plt.xlabel('distance in units of $\sigma$')
plt.savefig('overlapgauss_aic.pdf', bbox_inches='tight')
plt.savefig('overlapgauss_aic.png', dpi=140, bbox_inches='tight')
plt.close()
test_aic()
test_bayes()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.