Skip to content

Instantly share code, notes, and snippets.

@lucastheis
Last active December 20, 2015 07:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lucastheis/6094631 to your computer and use it in GitHub Desktop.
Save lucastheis/6094631 to your computer and use it in GitHub Desktop.
Log-normal vs. normal test.
__author__ = 'Lucas Theis <lucas@theis.io>'
__license__ = 'MIT License <http://www.opensource.org/licenses/mit-license.php>'
import sys
from numpy import *
from numpy.random import *
from matplotlib.pyplot import *
from scipy.special import gammaln
from scipy.stats import norm, invgamma
from scipy.maxentropy import logsumexp
def norm_log_ml(D, m0, v0, a0, b0):
"""
Normal marginal log-likelihood with normal-inverse-gamma prior.
Reference:
K. P. Murphy, Conjugate Bayesian analysis of the Gaussian distribution, 2007
"""
N = len(D)
# parameters of posterior normal-inverse-gamma
vN = 1. / (N + 1. / v0)
mN = vN * (m0 / v0 + sum(D))
aN = a0 + N / 2.
bN = b0 + (sum(square(D)) + m0**2 / v0 - mN**2 / vN) / 2.
return log(vN) / 2. - log(v0) / 2. \
+ a0 * log(b0) - aN * log(bN) \
+ gammaln(aN) - gammaln(a0) \
- N / 2. * log(pi) - N * log(2.)
def lognorm_log_ml(D, m0, v0, a0, b0):
"""
Marginal log-likelihood of log-normal distribution.
"""
if any(D <= 0.):
return -inf
return norm_log_ml(log(D), m0, v0, a0, b0) - sum(log(D))
def main(argv):
# normal-inverse-gamma hyperparameters
m0 = 0. # normal mean
v0 = 20. # normal variance
a0 = 1. # inverse-gamma shape
b0 = 100. # inverse-gamma scale
# prior beliefs in normal/log-normal distribution
pN = 0.9
pL = 1 - pN
# normal-inverse-gamma distribution
m, v = meshgrid(
linspace(-80, 80, 200),
linspace(0.1, 200, 200))
P = norm.pdf(m, m0, scale=sqrt(v0 * v)) * invgamma.pdf(v, a0, loc=0, scale=b0)
# visualize prior
ion()
figure(figsize=(6, 6))
contour(m, v, P, colors='black')
xlabel('$\mu$')
ylabel('$\sigma^2$')
title('Normal-inverse-gamma prior')
ylim([0, 200])
figure(figsize=(6, 6))
x = linspace(0.1, 5., 200)
plot(x, norm.pdf(log(x), loc=0., scale=0.5) / x, 'k')
title('Log-normal distribution')
xlabel('$x$')
ylabel('$p(x)$')
post_50 = []
post_25 = []
post_75 = []
num_data = [1, 2, 4, 8, 16, 32, 64, 96, 128, 160, 192, 224, 256]
num_rep = 1000
# test on example data
for N in num_data:
post = []
for r in range(num_rep):
# generate data
D = exp(norm.rvs(loc=0., scale=0.5, size=N))
# marginal log-likelihoods
lmN = norm_log_ml(D, m0, v0, a0, b0)
lmL = lognorm_log_ml(D, m0, v0, a0, b0)
# compute posterior probability of log-normal distribution
post.append(exp(log(pL) + lmL - logsumexp([log(pL) + lmL, log(pN) + lmN])))
post_50.append(median(post))
post_25.append(percentile(post, 25.))
post_75.append(percentile(post, 75.))
# visualize beliefs
figure(figsize=(6, 6))
plot(num_data, post_50, 'k', linewidth=2)
plot(num_data, post_75, 'k--')
plot(num_data, post_25, 'k--')
ylabel('Posterior probability of log-normal distribution')
xlabel('Number of data points')
legend(['Median', 'Quartiles'], loc='best')
ylim([0, 1])
raw_input()
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment