Skip to content

Instantly share code, notes, and snippets.

@jaanli
Created February 11, 2017 17:32
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jaanli/75df2f05698fb6872174306425bcc780 to your computer and use it in GitHub Desktop.
Save jaanli/75df2f05698fb6872174306425bcc780 to your computer and use it in GitHub Desktop.
Black box variational inference for Bayesian linear regression. Numpy and scipy only.
"""Use black-box variational inference (https://arxiv.org/abs/1401.0118) to
fit Bayesian linear regression (https://en.wikipedia.org/wiki/Bayesian_linear_regression)
and ensure it gets the analytic posterior mean from wikipedia.
"""
import numpy as np
import scipy.stats
def generate_data(cfg):
"""synthetic data:
y ~ normal(w^T x, 0.1)
"""
np.random.seed(0)
X = np.random.uniform(-1., 1., size=(cfg['batch_size'], cfg['n_features']))
w = np.array(cfg['data/init_weights'])
noise = scipy.stats.norm.rvs(loc=0, scale=0.1, size=cfg['batch_size'])
y = np.dot(X, w) + noise
prior_precision = 1. / np.square(cfg['p/w_sigma'])
analytic_posterior_mean = np.dot(
np.linalg.inv(np.dot(X.T, X) + prior_precision), (0. + np.dot(X.T, y)))
return X, y, analytic_posterior_mean
def log_p(y, w, x):
"""model:
w ~ normal(0, 1)
y ~ normal(w^T x, 1)
"""
log_lik = scipy.stats.norm.logpdf(y, np.dot(w, x.T), 1.)
log_prior = scipy.stats.norm.logpdf(w, 0., 1.)
return np.sum(log_lik, -1) + np.sum(log_prior, axis=-1)
# return log_lik + np.expand_dims(np.sum(log_prior, axis=-1), -1)
def log_q(w, mu, sigma):
"""approximate posterior:
q(w) ~ normal(mu, sigma)
"""
return np.sum(scipy.stats.norm.logpdf(w, mu, sigma), -1)
def softplus(x):
return np.log(1. + np.exp(x))
def inv_softplus(x):
return np.log(np.exp(x) - 1.)
def grad_softplus(x):
return 1. / (1. + np.exp(x)) * np.exp(x)
def grad_log_q(z, mu, sigma_arg):
"""
log_q = -log softplus(sigma_arg) -(z - mu)^2 / (2 * softplus(sigma_arg)^2)
grad_mu log_q = (z - mu) / softplus(sigma_arg)
grad_sigma_arg = -1/sp(sigma_arg) * grad_sp(sigma_arg)
+ (z - mu)^2 / (sp(sigma_arg)^3) * grad_sp(sigma_arg)
"""
diff = z - mu
sigma = softplus(sigma_arg)
grad_mu = diff / sigma
grad_sigma_arg = ((-1. / sigma + np.square(diff) / (np.power(sigma, 3.)))
* grad_softplus(sigma_arg))
return grad_mu, grad_sigma_arg
def adam(param, grad, t, beta1=0.95, beta2=0.999, epsilon=1e-8, alpha=1e-3):
if 'm_t' not in grad:
grad['m_t'] = 0.
grad['v_t'] = 0.
t = t + 1
grad['m_t'] = beta1 * grad['m_t'] + (1. - beta1) * grad['g_t']
grad['v_t'] = beta2 * grad['v_t'] + (1. - beta2) * np.square(grad['g_t'])
m_t_hat = grad['m_t'] / (1. - np.power(beta1, float(t)))
v_t_hat = grad['v_t'] / (1. - np.power(beta2, float(t)))
return alpha * m_t_hat / (np.sqrt(v_t_hat + epsilon)), grad
def get_config():
return {'n_features': 1,
'batch_size': 16,
'p/w_sigma': 1.,
'q/init_weights': 0.,
'q/init_std': 1.,
'q/n_samples': 10,
'optim/n_iterations': int(1e5),
'optim/name': 'adam',
'optim/learning_rate': 1e-3,
'data/init_weights': [3.],
'print_every': 1000}
def main():
cfg = get_config()
D = cfg['n_features']
np_x, np_y, analytic_posterior_mean = generate_data(cfg)
q_mu = np.ones(D) * cfg['q/init_weights']
q_sigma_arg = np.ones(D) * inv_softplus(cfg['q/init_std'])
grads = {}
grads['mu'] = {}
grads['sigma_arg'] = {}
lr = cfg['optim/learning_rate']
for i in range(1, cfg['optim/n_iterations']):
eps = np.random.randn(cfg['q/n_samples'], D)
q_sigma = softplus(q_sigma_arg)
q_w_samples = q_mu + q_sigma * eps
elbo = log_p(np_y, q_w_samples, np_x) - log_q(q_w_samples, q_mu, q_sigma)
if i % cfg['print_every'] == 0:
elbo_scalar = np.mean(elbo)
print('Iteration %d ELBO: %.3f' % (i, elbo_scalar))
print('Inferred mu, sigma %.3f %.3f' % (q_mu, q_sigma))
print('Analytic posterior mean for weights: %.3f' % analytic_posterior_mean)
grad_mu, grad_sigma_arg = grad_log_q(q_w_samples, q_mu, q_sigma_arg)
score = lambda grad_q: np.mean(grad_q * elbo)
if cfg['optim/name'] == 'sgd':
q_mu += lr * score(grad_mu)
q_sigma_arg += lr * score(grad_sigma_arg)
elif cfg['optim/name'] == 'adam':
grads['mu']['g_t'] = score(grad_mu)
grads['sigma_arg']['g_t'] = score(grad_sigma_arg)
q_mu_update, grads['mu'] = adam(
q_mu, grads['mu'], i, alpha=lr)
q_sigma_arg_update, grads['sigma_arg'] = adam(
q_sigma_arg, grads['sigma_arg'], i, alpha=lr)
q_mu += q_mu_update
q_sigma_arg += q_sigma_arg_update
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment