Last active
December 20, 2015 07:39
-
-
Save lucastheis/6094631 to your computer and use it in GitHub Desktop.
Log-normal vs. normal test.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
__author__ = 'Lucas Theis <lucas@theis.io>' | |
__license__ = 'MIT License <http://www.opensource.org/licenses/mit-license.php>' | |
import sys | |
from numpy import * | |
from numpy.random import * | |
from matplotlib.pyplot import * | |
from scipy.special import gammaln | |
from scipy.stats import norm, invgamma | |
from scipy.maxentropy import logsumexp | |
def norm_log_ml(D, m0, v0, a0, b0): | |
""" | |
Normal marginal log-likelihood with normal-inverse-gamma prior. | |
Reference: | |
K. P. Murphy, Conjugate Bayesian analysis of the Gaussian distribution, 2007 | |
""" | |
N = len(D) | |
# parameters of posterior normal-inverse-gamma | |
vN = 1. / (N + 1. / v0) | |
mN = vN * (m0 / v0 + sum(D)) | |
aN = a0 + N / 2. | |
bN = b0 + (sum(square(D)) + m0**2 / v0 - mN**2 / vN) / 2. | |
return log(vN) / 2. - log(v0) / 2. \ | |
+ a0 * log(b0) - aN * log(bN) \ | |
+ gammaln(aN) - gammaln(a0) \ | |
- N / 2. * log(pi) - N * log(2.) | |
def lognorm_log_ml(D, m0, v0, a0, b0): | |
""" | |
Marginal log-likelihood of log-normal distribution. | |
""" | |
if any(D <= 0.): | |
return -inf | |
return norm_log_ml(log(D), m0, v0, a0, b0) - sum(log(D)) | |
def main(argv): | |
# normal-inverse-gamma hyperparameters | |
m0 = 0. # normal mean | |
v0 = 20. # normal variance | |
a0 = 1. # inverse-gamma shape | |
b0 = 100. # inverse-gamma scale | |
# prior beliefs in normal/log-normal distribution | |
pN = 0.9 | |
pL = 1 - pN | |
# normal-inverse-gamma distribution | |
m, v = meshgrid( | |
linspace(-80, 80, 200), | |
linspace(0.1, 200, 200)) | |
P = norm.pdf(m, m0, scale=sqrt(v0 * v)) * invgamma.pdf(v, a0, loc=0, scale=b0) | |
# visualize prior | |
ion() | |
figure(figsize=(6, 6)) | |
contour(m, v, P, colors='black') | |
xlabel('$\mu$') | |
ylabel('$\sigma^2$') | |
title('Normal-inverse-gamma prior') | |
ylim([0, 200]) | |
figure(figsize=(6, 6)) | |
x = linspace(0.1, 5., 200) | |
plot(x, norm.pdf(log(x), loc=0., scale=0.5) / x, 'k') | |
title('Log-normal distribution') | |
xlabel('$x$') | |
ylabel('$p(x)$') | |
post_50 = [] | |
post_25 = [] | |
post_75 = [] | |
num_data = [1, 2, 4, 8, 16, 32, 64, 96, 128, 160, 192, 224, 256] | |
num_rep = 1000 | |
# test on example data | |
for N in num_data: | |
post = [] | |
for r in range(num_rep): | |
# generate data | |
D = exp(norm.rvs(loc=0., scale=0.5, size=N)) | |
# marginal log-likelihoods | |
lmN = norm_log_ml(D, m0, v0, a0, b0) | |
lmL = lognorm_log_ml(D, m0, v0, a0, b0) | |
# compute posterior probability of log-normal distribution | |
post.append(exp(log(pL) + lmL - logsumexp([log(pL) + lmL, log(pN) + lmN]))) | |
post_50.append(median(post)) | |
post_25.append(percentile(post, 25.)) | |
post_75.append(percentile(post, 75.)) | |
# visualize beliefs | |
figure(figsize=(6, 6)) | |
plot(num_data, post_50, 'k', linewidth=2) | |
plot(num_data, post_75, 'k--') | |
plot(num_data, post_25, 'k--') | |
ylabel('Posterior probability of log-normal distribution') | |
xlabel('Number of data points') | |
legend(['Median', 'Quartiles'], loc='best') | |
ylim([0, 1]) | |
raw_input() | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment