Create a gist now

Instantly share code, notes, and snippets.

Issue with multivariate Polya distribution estimation
import numpy as np
from scipy.special import gammaln
def log_multivariate_polya(X, alpha):
N = X.sum()
A = alpha.sum()
log_likelihood = gammaln(N+1) - gammaln(X+1).sum()
log_likelihood += gammaln(A) - gammaln(alpha).sum()
log_likelihood += gammaln(X + alpha).sum() - gammaln(N + A)
return log_likelihood
def logmean(loga):
return reduce(np.logaddexp, loga) - np.log(loga.size)
def log_multivariate_polya_montecarlo(X, alpha, iterations=1e5):
Theta = np.random.dirichlet(alpha, size=int(iterations))
logps = gammaln(X.sum() + 1) - gammaln(X + 1).sum()
logps += (X * np.log(Theta)).sum(1)
return logmean(logps)
if __name__ == '__main__':
np.random.seed(0)
X = np.array([0,50,50])
alpha = np.array([1,10,10])
print "X:", X
print "alpha:", alpha
print "analytic:", log_multivariate_polya(X, alpha)
print "montecarlo", log_multivariate_polya_montecarlo(X, alpha)
print
X = np.array([100,0,0])
alpha = np.array([1,10,10])
print "X:", X
print "alpha:", alpha
print "analytic:", log_multivariate_polya(X, alpha)
print "montecarlo", log_multivariate_polya_montecarlo(X, alpha)
@emanuele
Owner

This is the output if you run this code:


X: [ 0 50 50]
alpha: [ 1 10 10]
analytic: -5.22892710577
montecarlo -5.23470053651

X: [100 0 0]
alpha: [ 1 10 10]
analytic: -51.737395965
montecarlo -93.5266543113


Why there is such a big disagreement between the two implementations in the second case?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment