Skip to content

Instantly share code, notes, and snippets.

@JohannesBuchner
Created June 20, 2015 19:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save JohannesBuchner/7ed8966cfde16b7f8ebe to your computer and use it in GitHub Desktop.
Save JohannesBuchner/7ed8966cfde16b7f8ebe to your computer and use it in GitHub Desktop.
Probability that two measurements actually have the same value
import numpy
import matplotlib.pyplot as plt
import scipy.stats
# two gaussian uncertainties with width sigma
# at distance delta
# what is the probability that they actually have the same value?
def compute_bayes(delta, border=5):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
# integration space starts from the left border of the first gaussian
# and ends at the right edge of the second gaussian
x = numpy.linspace(-border, border + delta, 4000)
# which is where the prior is defined
prior = scipy.stats.uniform(loc=-border, scale=2*border + delta)
# model single-value:
Z1 = numpy.trapz(x=x, y=a.pdf(x) * b.pdf(x) * prior.pdf(x))
# model two-value:
# the two integrals for the two values are independent
int_x = numpy.trapz(x=x, y=a.pdf(x) * prior.pdf(x))
int_y = numpy.trapz(x=x, y=b.pdf(x) * prior.pdf(x))
Z2 = int_x * int_y
return Z2 / (Z2 + Z1)
def compute_naive(delta):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
# fraction of overlapping area
# at 5 sigma, the remaining area from each is negligible
border = 5
x = numpy.linspace(-border, border + delta, 4000)
ap = a.pdf(x)
bp = b.pdf(x)
mul = numpy.min([ap, bp], axis=0)
Z2 = numpy.trapz(x=x, y=mul)
return 1 - Z2
def compute_aic(delta):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
loglike_1 = a.logpdf(0) + b.logpdf(delta)
# best case is in the middle
peak = delta / 2.
loglike_2 = a.logpdf(peak) + b.logpdf(peak)
deltachi2 = -2 * (loglike_2 - loglike_1)
# parameters are just the location
nparam2 = 2
nparam1 = 1
return deltachi2 - 2 * (nparam2 - nparam1)
def compute_bic(delta):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
loglike_1 = a.logpdf(0) + b.logpdf(delta)
# best case is in the middle
peak = delta / 2.
loglike_2 = a.logpdf(peak) + b.logpdf(peak)
deltachi2 = -2 * (loglike_2 - loglike_1)
# parameters are just the location
nparam2 = 2
nparam1 = 1
nsample = 2
return deltachi2 - numpy.log(nsample) * (nparam2 - nparam1)
def compute_bic_prob(delta):
a = scipy.stats.norm()
b = scipy.stats.norm(loc=delta)
loglike_1 = a.logpdf(0) + b.logpdf(delta)
# best case is in the middle
peak = delta / 2.
loglike_2 = a.logpdf(peak) + b.logpdf(peak)
# parameters are just the location
nparam2 = 2
nparam1 = 1
nsample = 2
bic1 = -2 * loglike_1 - numpy.log(nsample) * nparam1
bic2 = -2 * loglike_2 - numpy.log(nsample) * nparam2
Z1 = -0.5 * numpy.exp(bic1)
Z2 = -0.5 * numpy.exp(bic2)
return Z2 / (Z2 + Z1)
deltas = [0, 1, 2, 3, 4, 5]
def test_bayes():
BICBs = [compute_bic_prob(delta) for delta in deltas]
plt.plot(deltas, BICBs, ':', label='BIC')
Bnaives = [compute_naive(delta) for delta in deltas]
plt.plot(deltas, Bnaives, '-', label='1 - overlapping area')
alpha = 1
for border in 3, 5, 1, 100:
Bs = [compute_bayes(delta, border=border) for delta in deltas]
plt.plot(deltas, Bs, 'o-', label='prior up to $%s\sigma$' % border, alpha=alpha)
alpha=0.1
for d in deltas:
if int(d) != d: continue
s = 0.04
a = scipy.stats.norm(d, s)
b = scipy.stats.norm(d+s*d, s)
x = numpy.linspace(d-3*s, d+s*d+3*s, 100)
ap = a.pdf(x)
bp = b.pdf(x)
plt.fill(x, ap / ap.max() * 0.1, color='grey', alpha=0.5)
plt.fill(x, bp / bp.max() * 0.1, color='blue', alpha=0.5)
plt.xlabel('distance in units of $\sigma$')
plt.ylabel('prob(different|D)')
plt.xlim(-0.5, 5.5)
plt.legend(loc='best')
plt.savefig('overlapgauss_bayes.pdf', bbox_inches='tight')
plt.savefig('overlapgauss_bayes.png', dpi=140, bbox_inches='tight')
plt.close()
def test_aic():
import matplotlib.patches as patches
AICs = [compute_aic(delta) for delta in deltas]
plt.plot(deltas, AICs, 's-')
#plt.text(1, 8)
plt.text(0, 10, 'Are two measurements distinct?', va='center')
plt.text(1, 7, 'Different value', va='center')
plt.text(2, -1.5, 'Same value', va='center')
plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 6, alpha=0.1, linewidth=0, color='grey'))
plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 5, alpha=0.2, linewidth=0, color='grey'))
plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 4, alpha=0.3, linewidth=0, color='grey'))
for d in deltas:
if int(d) != d: continue
s = 0.04
a = scipy.stats.norm(d, s)
b = scipy.stats.norm(d+s*d, s)
x = numpy.linspace(d-3*s, d+s*d+3*s, 100)
ap = a.pdf(x)
bp = b.pdf(x)
plt.fill(x, ap / ap.max() * 1 - 4, color='grey', alpha=0.5)
plt.fill(x, bp / bp.max() * 1 - 4, color='blue', alpha=0.5)
plt.ylabel('Akaike Information Criterion')
plt.xlim(-0.5, 5.5)
plt.xlabel('distance in units of $\sigma$')
plt.savefig('overlapgauss_aic.pdf', bbox_inches='tight')
plt.savefig('overlapgauss_aic.png', dpi=140, bbox_inches='tight')
plt.close()
test_aic()
test_bayes()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment