Created
November 8, 2016 22:54
-
-
Save michaelchughes/a5b51d23d3769a9406c9a33ee187ab3d to your computer and use it in GitHub Desktop.
Example code to compute ELBO objective as function of single-doc doc-topic probas vector (theta)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import autograd.numpy as np | |
from autograd.scipy.special import gammaln, digamma | |
from autograd import grad, hessian | |
from scipy.optimize import fmin_l_bfgs_b | |
def L_theta( | |
theta_d_K=None, | |
N_d_K=None, | |
alpha_K=None, | |
y_d=None, | |
eta_K=None, | |
delta_stddev=None, | |
): | |
''' Compute ELBO objective as function of theta, with other terms constant. | |
Args | |
---- | |
N_d_K : 1D array, size K, positive reals | |
vector of counts of topic usage in current document | |
alpha_K : 1D array, size K, positive reals | |
vector of "pseudo-counts" from prior on doc-topic probas | |
y_d : None or real scalar | |
eta_K : None or 1D array of size K | |
regression weight coefficient vector | |
delta : None or positive real scalar | |
standard deviation | |
Returns | |
------- | |
L_theta : scalar real | |
Examples (unsupervised) | |
----------------------- | |
>>> K = 3 | |
>>> np.set_printoptions(precision=3, suppress=1, linewidth=80) | |
>>> N_d_K = np.asarray([100., 10., 1.]) | |
>>> alpha_K = np.asarray([1.0, 1.0, 1.0]) | |
# Compute optimal theta vector | |
>>> opt_theta_d_K = N_d_K + alpha_K | |
>>> opt_L = L_theta(opt_theta_d_K, N_d_K, alpha_K) | |
# Verify elbo scores optimal theta higher than bad theta vector | |
>>> bad_theta_d_K = np.ones(K) | |
>>> bad_L = L_theta(bad_theta_d_K, N_d_K, alpha_K) | |
>>> opt_L > bad_L | |
True | |
# Verify also better than nearly-optimal theta | |
>>> prng = np.random.RandomState(0) | |
>>> nearopt_theta_d_K = N_d_K + alpha_K + 0.1 * prng.randn(K) | |
>>> nearopt_L = L_theta(nearopt_theta_d_K, N_d_K, alpha_K) | |
>>> opt_L > nearopt_L | |
True | |
# Verify via autograd that gradient is zero at optimal point | |
>>> grad_of_L = grad(lambda theta_d_K: L_theta(theta_d_K, N_d_K, alpha_K)) | |
>>> grad_of_L(opt_theta_d_K) | |
array([ 0., 0., 0.]) | |
Examples (supervised) | |
----------------------- | |
>>> K = 3 | |
>>> y_d = 0.0 | |
>>> eta_K = np.asarray([5.0, 5.0, 5.0]) | |
>>> delta = 1.0 | |
>>> grad_of_L_super = grad( | |
... lambda theta_d_K: | |
... L_theta(theta_d_K, N_d_K, alpha_K, y_d, eta_K, delta)) | |
>>> grad_of_L_super(opt_theta_d_K).max() < 1e-8 | |
True | |
''' | |
sum_theta_d = np.sum(theta_d_K) | |
E_log_pi_d_K = digamma(theta_d_K) - digamma(sum_theta_d) | |
L_theta_unsuper = ( | |
np.inner(N_d_K + alpha_K - theta_d_K, E_log_pi_d_K) | |
- gammaln(sum_theta_d) + np.sum(gammaln(theta_d_K))) | |
if y_d is None: | |
L_theta_super = 0.0 | |
else: | |
E_pi_d_K = theta_d_K / np.sum(theta_d_K) | |
E_eta_pi = np.inner(eta_K, E_pi_d_K) | |
E_etaT_pi_piT_eta = 1.0 / (sum_theta_d + sum_theta_d**2) * ( | |
np.inner(np.square(eta_K), theta_d_K) | |
+ np.inner(eta_K, theta_d_K)**2) | |
L_theta_super = 1.0 / (delta_stddev * delta_stddev) * ( | |
y_d * E_eta_pi - 0.5 * E_etaT_pi_piT_eta) | |
return L_theta_unsuper + L_theta_super | |
def calc_theta_d_K_to_optimize_L( | |
N_d_K=None, | |
alpha_K=None, | |
y_d=None, | |
eta_K=None, | |
delta_stddev=None, | |
seed=0, | |
): | |
''' Estimate theta vector via gradient descent to optimize ELBO objective | |
Args | |
---- | |
See L_theta | |
Returns | |
------- | |
theta_d_K : 1D array, size K, positive values | |
Represents parameter vector for K-dim Dirichlet posterior | |
info_dict : dict | |
Contains gory details from optimization procedure | |
>>> K = 3 | |
>>> np.set_printoptions(precision=3, suppress=1, linewidth=80) | |
>>> N_d_K = np.asarray([100., 10., 1.]) | |
>>> alpha_K = np.asarray([1.0, 1.0, 1.0]) | |
# Compute optimal theta vector by hand | |
>>> opt_theta_d_K = N_d_K + alpha_K | |
# Compute optimal theta via numerical gradient descent | |
>>> est_theta_d_K, _ = calc_theta_d_K_to_optimize_L(N_d_K, alpha_K) | |
# Verify they are the same | |
>>> np.allclose(opt_theta_d_K, est_theta_d_K) | |
True | |
''' | |
K = alpha_K.size | |
prng = np.random.RandomState(seed) | |
# Initial guess | |
init_theta_d_K = prng.rand(K) | |
init_eta_d_K = theta2eta(init_theta_d_K) | |
# Define optimization objective in UNCONSTRAINED SPACE | |
# Minimize the negative ELBO function | |
def calc_loss_wrt_eta(eta_K): | |
return -1 * L_theta( | |
eta2theta(eta_K), | |
N_d_K, alpha_K, y_d, eta_K, delta_stddev) | |
grad_of_loss_wrt_eta = grad(calc_loss_wrt_eta) | |
# Solve UNCONSTRAINED problem | |
opt_eta_d_K, opt_f_val, info_dict = fmin_l_bfgs_b( | |
calc_loss_wrt_eta, | |
init_eta_d_K, | |
grad_of_loss_wrt_eta) | |
# Transform back to theta and return | |
return eta2theta(opt_eta_d_K), info_dict | |
def theta2eta(theta_K): | |
''' Transform positive theta vector into unconstrained eta vector | |
Args | |
---- | |
theta_K : 1D array, size K, positive real values | |
Returns | |
------- | |
eta_K : 1D array, size K, unconstrained real values | |
''' | |
return np.log(theta_K) | |
def eta2theta(eta_K): | |
''' Transform unconstrained eta vector into positive theta vector | |
Args | |
---- | |
eta_K : 1D array, size K, unconstrained real values | |
Returns | |
------- | |
theta_K : 1D array, size K, positive real values | |
''' | |
return np.exp(eta_K) | |
""" IGNORE BELOW HERE | |
def L_resp( | |
resp_K=None, | |
log_weights_K=None): | |
''' | |
>>> K = 3 | |
>>> prng = np.random.RandomState(0) | |
>>> log_weights_K = prng.randn(K) | |
>>> opt_resp_K = np.exp(log_weights_K) | |
>>> opt_resp_K /= opt_resp_K.sum() | |
>>> hess_of_L = hessian( | |
... lambda resp_K: L_resp(resp_K, log_weights_K)) | |
>>> np.all(np.linalg.eigvals(hess_of_L(opt_resp_K)) < 0) | |
True | |
>>> for i in range(100): | |
... rand_resp_K = prng.rand(K) | |
... rand_resp_K /= rand_resp_K | |
... H_KK = hess_of_L(rand_resp_K) | |
... if not np.all(np.linalg.eigvals(H_KK) < 0): | |
... print H_KK | |
... print sorted(np.linalg.eigvals(H_KK)) | |
''' | |
return np.inner(resp_K, log_weights_K) - np.inner(resp_K, np.log(resp_K)) | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment