Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save michaelchughes/a5b51d23d3769a9406c9a33ee187ab3d to your computer and use it in GitHub Desktop.
Save michaelchughes/a5b51d23d3769a9406c9a33ee187ab3d to your computer and use it in GitHub Desktop.
Example code to compute ELBO objective as function of single-doc doc-topic probas vector (theta)
import autograd.numpy as np
from autograd.scipy.special import gammaln, digamma
from autograd import grad, hessian
from scipy.optimize import fmin_l_bfgs_b
def L_theta(
theta_d_K=None,
N_d_K=None,
alpha_K=None,
y_d=None,
eta_K=None,
delta_stddev=None,
):
''' Compute ELBO objective as function of theta, with other terms constant.
Args
----
N_d_K : 1D array, size K, positive reals
vector of counts of topic usage in current document
alpha_K : 1D array, size K, positive reals
vector of "pseudo-counts" from prior on doc-topic probas
y_d : None or real scalar
eta_K : None or 1D array of size K
regression weight coefficient vector
delta : None or positive real scalar
standard deviation
Returns
-------
L_theta : scalar real
Examples (unsupervised)
-----------------------
>>> K = 3
>>> np.set_printoptions(precision=3, suppress=1, linewidth=80)
>>> N_d_K = np.asarray([100., 10., 1.])
>>> alpha_K = np.asarray([1.0, 1.0, 1.0])
# Compute optimal theta vector
>>> opt_theta_d_K = N_d_K + alpha_K
>>> opt_L = L_theta(opt_theta_d_K, N_d_K, alpha_K)
# Verify elbo scores optimal theta higher than bad theta vector
>>> bad_theta_d_K = np.ones(K)
>>> bad_L = L_theta(bad_theta_d_K, N_d_K, alpha_K)
>>> opt_L > bad_L
True
# Verify also better than nearly-optimal theta
>>> prng = np.random.RandomState(0)
>>> nearopt_theta_d_K = N_d_K + alpha_K + 0.1 * prng.randn(K)
>>> nearopt_L = L_theta(nearopt_theta_d_K, N_d_K, alpha_K)
>>> opt_L > nearopt_L
True
# Verify via autograd that gradient is zero at optimal point
>>> grad_of_L = grad(lambda theta_d_K: L_theta(theta_d_K, N_d_K, alpha_K))
>>> grad_of_L(opt_theta_d_K)
array([ 0., 0., 0.])
Examples (supervised)
-----------------------
>>> K = 3
>>> y_d = 0.0
>>> eta_K = np.asarray([5.0, 5.0, 5.0])
>>> delta = 1.0
>>> grad_of_L_super = grad(
... lambda theta_d_K:
... L_theta(theta_d_K, N_d_K, alpha_K, y_d, eta_K, delta))
>>> grad_of_L_super(opt_theta_d_K).max() < 1e-8
True
'''
sum_theta_d = np.sum(theta_d_K)
E_log_pi_d_K = digamma(theta_d_K) - digamma(sum_theta_d)
L_theta_unsuper = (
np.inner(N_d_K + alpha_K - theta_d_K, E_log_pi_d_K)
- gammaln(sum_theta_d) + np.sum(gammaln(theta_d_K)))
if y_d is None:
L_theta_super = 0.0
else:
E_pi_d_K = theta_d_K / np.sum(theta_d_K)
E_eta_pi = np.inner(eta_K, E_pi_d_K)
E_etaT_pi_piT_eta = 1.0 / (sum_theta_d + sum_theta_d**2) * (
np.inner(np.square(eta_K), theta_d_K)
+ np.inner(eta_K, theta_d_K)**2)
L_theta_super = 1.0 / (delta_stddev * delta_stddev) * (
y_d * E_eta_pi - 0.5 * E_etaT_pi_piT_eta)
return L_theta_unsuper + L_theta_super
def calc_theta_d_K_to_optimize_L(
N_d_K=None,
alpha_K=None,
y_d=None,
eta_K=None,
delta_stddev=None,
seed=0,
):
''' Estimate theta vector via gradient descent to optimize ELBO objective
Args
----
See L_theta
Returns
-------
theta_d_K : 1D array, size K, positive values
Represents parameter vector for K-dim Dirichlet posterior
info_dict : dict
Contains gory details from optimization procedure
>>> K = 3
>>> np.set_printoptions(precision=3, suppress=1, linewidth=80)
>>> N_d_K = np.asarray([100., 10., 1.])
>>> alpha_K = np.asarray([1.0, 1.0, 1.0])
# Compute optimal theta vector by hand
>>> opt_theta_d_K = N_d_K + alpha_K
# Compute optimal theta via numerical gradient descent
>>> est_theta_d_K, _ = calc_theta_d_K_to_optimize_L(N_d_K, alpha_K)
# Verify they are the same
>>> np.allclose(opt_theta_d_K, est_theta_d_K)
True
'''
K = alpha_K.size
prng = np.random.RandomState(seed)
# Initial guess
init_theta_d_K = prng.rand(K)
init_eta_d_K = theta2eta(init_theta_d_K)
# Define optimization objective in UNCONSTRAINED SPACE
# Minimize the negative ELBO function
def calc_loss_wrt_eta(eta_K):
return -1 * L_theta(
eta2theta(eta_K),
N_d_K, alpha_K, y_d, eta_K, delta_stddev)
grad_of_loss_wrt_eta = grad(calc_loss_wrt_eta)
# Solve UNCONSTRAINED problem
opt_eta_d_K, opt_f_val, info_dict = fmin_l_bfgs_b(
calc_loss_wrt_eta,
init_eta_d_K,
grad_of_loss_wrt_eta)
# Transform back to theta and return
return eta2theta(opt_eta_d_K), info_dict
def theta2eta(theta_K):
''' Transform positive theta vector into unconstrained eta vector
Args
----
theta_K : 1D array, size K, positive real values
Returns
-------
eta_K : 1D array, size K, unconstrained real values
'''
return np.log(theta_K)
def eta2theta(eta_K):
''' Transform unconstrained eta vector into positive theta vector
Args
----
eta_K : 1D array, size K, unconstrained real values
Returns
-------
theta_K : 1D array, size K, positive real values
'''
return np.exp(eta_K)
""" IGNORE BELOW HERE
def L_resp(
resp_K=None,
log_weights_K=None):
'''
>>> K = 3
>>> prng = np.random.RandomState(0)
>>> log_weights_K = prng.randn(K)
>>> opt_resp_K = np.exp(log_weights_K)
>>> opt_resp_K /= opt_resp_K.sum()
>>> hess_of_L = hessian(
... lambda resp_K: L_resp(resp_K, log_weights_K))
>>> np.all(np.linalg.eigvals(hess_of_L(opt_resp_K)) < 0)
True
>>> for i in range(100):
... rand_resp_K = prng.rand(K)
... rand_resp_K /= rand_resp_K
... H_KK = hess_of_L(rand_resp_K)
... if not np.all(np.linalg.eigvals(H_KK) < 0):
... print H_KK
... print sorted(np.linalg.eigvals(H_KK))
'''
return np.inner(resp_K, log_weights_K) - np.inner(resp_K, np.log(resp_K))
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment