import numpy import matplotlib.pyplot as plt import scipy.stats # two gaussian uncertainties with width sigma # at distance delta # what is the probability that they actually have the same value? def compute_bayes(delta, border=5): a = scipy.stats.norm() b = scipy.stats.norm(loc=delta) # integration space starts from the left border of the first gaussian # and ends at the right edge of the second gaussian x = numpy.linspace(-border, border + delta, 4000) # which is where the prior is defined prior = scipy.stats.uniform(loc=-border, scale=2*border + delta) # model single-value: Z1 = numpy.trapz(x=x, y=a.pdf(x) * b.pdf(x) * prior.pdf(x)) # model two-value: # the two integrals for the two values are independent int_x = numpy.trapz(x=x, y=a.pdf(x) * prior.pdf(x)) int_y = numpy.trapz(x=x, y=b.pdf(x) * prior.pdf(x)) Z2 = int_x * int_y return Z2 / (Z2 + Z1) def compute_naive(delta): a = scipy.stats.norm() b = scipy.stats.norm(loc=delta) # fraction of overlapping area # at 5 sigma, the remaining area from each is negligible border = 5 x = numpy.linspace(-border, border + delta, 4000) ap = a.pdf(x) bp = b.pdf(x) mul = numpy.min([ap, bp], axis=0) Z2 = numpy.trapz(x=x, y=mul) return 1 - Z2 def compute_aic(delta): a = scipy.stats.norm() b = scipy.stats.norm(loc=delta) loglike_1 = a.logpdf(0) + b.logpdf(delta) # best case is in the middle peak = delta / 2. loglike_2 = a.logpdf(peak) + b.logpdf(peak) deltachi2 = -2 * (loglike_2 - loglike_1) # parameters are just the location nparam2 = 2 nparam1 = 1 return deltachi2 - 2 * (nparam2 - nparam1) def compute_bic(delta): a = scipy.stats.norm() b = scipy.stats.norm(loc=delta) loglike_1 = a.logpdf(0) + b.logpdf(delta) # best case is in the middle peak = delta / 2. loglike_2 = a.logpdf(peak) + b.logpdf(peak) deltachi2 = -2 * (loglike_2 - loglike_1) # parameters are just the location nparam2 = 2 nparam1 = 1 nsample = 2 return deltachi2 - numpy.log(nsample) * (nparam2 - nparam1) def compute_bic_prob(delta): a = scipy.stats.norm() b = scipy.stats.norm(loc=delta) loglike_1 = a.logpdf(0) + b.logpdf(delta) # best case is in the middle peak = delta / 2. loglike_2 = a.logpdf(peak) + b.logpdf(peak) # parameters are just the location nparam2 = 2 nparam1 = 1 nsample = 2 bic1 = -2 * loglike_1 - numpy.log(nsample) * nparam1 bic2 = -2 * loglike_2 - numpy.log(nsample) * nparam2 Z1 = -0.5 * numpy.exp(bic1) Z2 = -0.5 * numpy.exp(bic2) return Z2 / (Z2 + Z1) deltas = [0, 1, 2, 3, 4, 5] def test_bayes(): BICBs = [compute_bic_prob(delta) for delta in deltas] plt.plot(deltas, BICBs, ':', label='BIC') Bnaives = [compute_naive(delta) for delta in deltas] plt.plot(deltas, Bnaives, '-', label='1 - overlapping area') alpha = 1 for border in 3, 5, 1, 100: Bs = [compute_bayes(delta, border=border) for delta in deltas] plt.plot(deltas, Bs, 'o-', label='prior up to $%s\sigma$' % border, alpha=alpha) alpha=0.1 for d in deltas: if int(d) != d: continue s = 0.04 a = scipy.stats.norm(d, s) b = scipy.stats.norm(d+s*d, s) x = numpy.linspace(d-3*s, d+s*d+3*s, 100) ap = a.pdf(x) bp = b.pdf(x) plt.fill(x, ap / ap.max() * 0.1, color='grey', alpha=0.5) plt.fill(x, bp / bp.max() * 0.1, color='blue', alpha=0.5) plt.xlabel('distance in units of $\sigma$') plt.ylabel('prob(different|D)') plt.xlim(-0.5, 5.5) plt.legend(loc='best') plt.savefig('overlapgauss_bayes.pdf', bbox_inches='tight') plt.savefig('overlapgauss_bayes.png', dpi=140, bbox_inches='tight') plt.close() def test_aic(): import matplotlib.patches as patches AICs = [compute_aic(delta) for delta in deltas] plt.plot(deltas, AICs, 's-') #plt.text(1, 8) plt.text(0, 10, 'Are two measurements distinct?', va='center') plt.text(1, 7, 'Different value', va='center') plt.text(2, -1.5, 'Same value', va='center') plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 6, alpha=0.1, linewidth=0, color='grey')) plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 5, alpha=0.2, linewidth=0, color='grey')) plt.gca().add_patch(patches.Rectangle((-0.5, 0), deltas[-1]+1, 4, alpha=0.3, linewidth=0, color='grey')) for d in deltas: if int(d) != d: continue s = 0.04 a = scipy.stats.norm(d, s) b = scipy.stats.norm(d+s*d, s) x = numpy.linspace(d-3*s, d+s*d+3*s, 100) ap = a.pdf(x) bp = b.pdf(x) plt.fill(x, ap / ap.max() * 1 - 4, color='grey', alpha=0.5) plt.fill(x, bp / bp.max() * 1 - 4, color='blue', alpha=0.5) plt.ylabel('Akaike Information Criterion') plt.xlim(-0.5, 5.5) plt.xlabel('distance in units of $\sigma$') plt.savefig('overlapgauss_aic.pdf', bbox_inches='tight') plt.savefig('overlapgauss_aic.png', dpi=140, bbox_inches='tight') plt.close() test_aic() test_bayes()