Last active
July 19, 2019 01:18
-
-
Save tokestermw/a9de2ef498a09747bbf673ddf6ea4843 to your computer and use it in GitHub Desktop.
Variational inference and Bayesian deep learning tutorial (w/ uncertainty intervals) using TensorFlow and Edward.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Some description. | |
""" | |
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import sys | |
import json | |
import tqdm | |
import numpy as np | |
import tensorflow as tf | |
import edward as ed | |
N_FEATURES = 2 | |
DATA_LENGTH = 3 | |
REAL_WEIGHT = 0.7 | |
REAL_BIAS = 2.5 | |
REAL_DATA = np.array([2., 4., 6.]) | |
REAL_LABELS = REAL_WEIGHT * REAL_DATA + REAL_BIAS | |
MC_SAMPLES = 100 | |
def create_fake_data(): | |
data = np.random.randint(0, 10, DATA_LENGTH).astype(np.float32) | |
noise = np.random.randn(DATA_LENGTH).astype(np.float32) | |
labels = REAL_WEIGHT * data + REAL_BIAS + noise | |
return data, labels | |
class Model: | |
def __init__(self, model_name, dropout_keep_prob=1.0): | |
self.model_name = model_name | |
self.dropout_keep_prob = dropout_keep_prob | |
self.x = tf.placeholder(tf.float32, (DATA_LENGTH, )) | |
self.y = tf.placeholder(tf.float32, (DATA_LENGTH, )) | |
if self.model_name == "simple_linear": | |
# -- set parameters | |
self.bias = tf.get_variable("bias", [1]) | |
self.weight = tf.get_variable("weight", [1]) # should be 1. | |
# -- set dropout (optional) | |
self.add_dropout() | |
# -- set model | |
self.nn = self.weight * self.x + self.bias | |
# -- set loss | |
self.loss = tf.reduce_mean((self.y - self.nn) ** 2) | |
elif self.model_name == "bayesian_simple_linear": | |
# -- set priors | |
self.weight_mu = tf.zeros(1) # tf.get_variable("weight_mu", [1]) | |
self.weight_sigma = tf.ones(1) # fixed hyperparameters | |
self.weight = ed.models.Normal(mu=self.weight_mu, sigma=self.weight_sigma) | |
self.bias_mu = tf.zeros(1) # tf.get_variable("bias_mu", [1]) | |
self.bias_sigma = tf.ones(1) # fixed hyperparameters | |
self.bias = ed.models.Normal(mu=self.bias_mu, sigma=self.bias_sigma) | |
# -- set model | |
self.nn_mean = self.weight * self.x + self.bias | |
self.nn_sigma = tf.ones(1) # fixed hyperparameters | |
self.nn = ed.models.Normal(mu=self.nn_mean, sigma=self.nn_sigma) | |
# -- set variational parameters | |
self.qweight = ed.models.Normal( | |
mu=tf.get_variable("qweight_mu", initializer=tf.random_normal([1])), | |
sigma=tf.nn.softplus(tf.get_variable("qweight_sigma", initializer=tf.random_normal([1])))) | |
self.qbias = ed.models.Normal( | |
mu=tf.get_variable("qbias_mu", initializer=tf.random_normal([1])), | |
sigma=tf.nn.softplus(tf.get_variable("qbias_sigma", initializer=tf.random_normal([1])))) | |
# -- inference | |
self.latent_vars = {self.weight: self.qweight, self.bias: self.qbias} | |
self.data = {self.nn: self.y} | |
self.loss = (self.latent_vars, self.data) | |
else: | |
raise ValueError("Wrong model error.") | |
def add_dropout(self): | |
self._keep_prob = tf.Variable(name="keep_prob", initial_value=self.dropout_keep_prob, trainable=False) | |
self.bias = tf.cond( | |
self._keep_prob < 1.0, lambda: tf.nn.dropout(self.bias, keep_prob=self._keep_prob), lambda: self.bias) | |
self.weight = tf.cond( | |
self._keep_prob < 1.0, lambda: tf.nn.dropout(self.weight, keep_prob=self._keep_prob), lambda: self.weight) | |
@property | |
def keep_prob(self): | |
return self._keep_prob | |
def optimize(self): | |
if _is_loss_function(self.loss): | |
# loss optimization | |
self.optimizer = tf.train.GradientDescentOptimizer(.005) | |
self.train_op = self.optimizer.minimize(self.loss) | |
else: | |
# variational inference | |
latent_vars, data = self.loss | |
self.inference = ed.KLqp(latent_vars=latent_vars, data=data) | |
self.inference.initialize() | |
self.train_op = self.inference | |
return self.train_op | |
def _is_loss_function(loss): | |
return isinstance(loss, tf.Tensor) | |
def _section(text): | |
print("-"*10 + " ", text.upper()) | |
def _tau_inv(keep_prob, N, l2=0.005, lambda_=0.00001): | |
# -- Variational Dropout Uncertainty Interval by Gal | |
# https://github.com/yaringal/DropoutUncertaintyDemos/blob/master/convnetjs/regression_uncertainty.js | |
tau = keep_prob * l2 / (2. * N * lambda_) | |
return 1. / tau | |
def main(model_name, dropout_keep_prob=1.0): | |
_section("set model") | |
model = Model(model_name, dropout_keep_prob) | |
train_op = model.optimize() | |
local_init_op = tf.local_variables_initializer() | |
global_init_op = tf.global_variables_initializer() | |
tvars = tf.trainable_variables() | |
_section("train") | |
with tf.Session() as sess: | |
sess.run([local_init_op, global_init_op]) | |
tq = tqdm.trange(2000) | |
for it in tq: | |
data, labels = create_fake_data() | |
if _is_loss_function(model.loss): | |
sess.run(train_op, feed_dict={ | |
model.x: data, | |
model.y: labels, | |
}) | |
weight_, bias_ = sess.run([model.weight, model.bias]) | |
tq.set_postfix(weight=weight_, bias=bias_) | |
else: | |
train_op.update(feed_dict={ | |
model.x: data, | |
model.y: labels, | |
}) | |
weight_, bias_ = sess.run([model.weight.value(), model.bias.value()]) | |
tq.set_postfix(weight=weight_, bias=bias_) | |
print("trainable variables:", json.dumps({t.name: sess.run(t).tolist() for t in tvars}, indent=4)) | |
_section("predict on sample data") | |
print("real weight", REAL_WEIGHT) | |
print("real bias", REAL_BIAS) | |
print("real data", REAL_DATA) | |
print("real labels", REAL_LABELS) | |
# -- checking out the variable distribution | |
if _is_loss_function(model.loss): | |
if dropout_keep_prob < 1.0: | |
# don't do dropout for point estimate | |
sess.run(model.keep_prob.assign(1.0)) | |
_section("loss optimization w/ dropout") | |
else: | |
_section("loss optimization") | |
nn_point_estimate, weight_point_estimate, bias_point_estimate = \ | |
sess.run([model.nn, model.weight, model.bias], feed_dict={ | |
model.x: REAL_DATA, | |
model.y: REAL_LABELS, | |
}) | |
print("weight point estimate", weight_point_estimate) | |
print("bias point estimate", bias_point_estimate) | |
print("nn point estimate", nn_point_estimate) | |
print("mean absolute error", np.mean(np.absolute(nn_point_estimate - REAL_LABELS))) | |
if dropout_keep_prob < 1.0: | |
_section("monte carlo simulations") | |
sess.run(model.keep_prob.assign(dropout_keep_prob)) | |
nn_mc = [] | |
for _ in range(MC_SAMPLES): | |
nn_mc.append(sess.run([model.nn], feed_dict={model.x: REAL_DATA, model.y: REAL_LABELS})) | |
nn_mc = np.array(nn_mc) | |
print("monte carlo nn mean", np.mean(nn_mc, axis=0)) | |
print("monte carlo nn variance", np.var(nn_mc, axis=0)) | |
print("+ Gal inverse precision", np.var(nn_mc, axis=0) + _tau_inv(dropout_keep_prob, MC_SAMPLES)) | |
print("mean absolute error", np.mean(np.absolute(np.mean(nn_mc, axis=0) - REAL_LABELS))) | |
else: | |
_section("variational inference") | |
weight_mean, weight_var = sess.run(tf.nn.moments(model.qweight.sample(MC_SAMPLES), axes=[0])) | |
print("weight posterior mean and variance", weight_mean, weight_var) | |
bias_mean, bias_var = sess.run(tf.nn.moments(model.qbias.sample(MC_SAMPLES), axes=[0])) | |
print("bias posterior mean and variance", bias_mean, bias_var) | |
_section("prior predictive checks") | |
prior = ed.copy(model.nn, { | |
model.weight: model.weight.mean(), model.bias: model.bias.mean(), | |
}, scope="copied/prior") | |
nn_prior = [] | |
for _ in range(MC_SAMPLES): | |
nn_prior.append(sess.run(prior.value(), feed_dict={model.x: REAL_DATA, model.y: REAL_LABELS})) | |
nn_prior = np.array(nn_prior) | |
print("nn prior mean and variance", np.mean(nn_prior, axis=0), np.var(nn_prior, axis=0)) | |
print("mean absolute error", np.mean(np.absolute(np.mean(nn_prior, axis=0) - REAL_LABELS))) | |
_section("posterior predictive checks") | |
posterior = ed.copy(model.nn, dict_swap={ | |
model.weight: model.qweight.mean(), model.bias: model.qbias.mean(), | |
}, scope="copied/posterior") | |
nn_post = sess.run(posterior.sample(MC_SAMPLES), feed_dict={model.x: REAL_DATA}) | |
print("nn posterior mean and variance", np.mean(nn_post, axis=0), np.var(nn_post, axis=0)) | |
print("mean absolute error", np.mean(np.absolute(np.mean(nn_post, axis=0) - REAL_LABELS))) | |
# TODO: plot? | |
if __name__ == '__main__': | |
""" Try the following: | |
>>> python tf_vi_tutorial.py simple_linear | |
>>> python tf_vi_tutorial.py simple_linear .9 | |
>>> python tf_vi_tutorial.py bayesian_simple_linear | |
""" | |
args = sys.argv | |
if len(args) == 1: | |
main("simple_linear") | |
elif len(args) == 2: | |
_, model_name = args | |
main(model_name) | |
elif len(args) == 3: | |
_, model_name, dropout_keep_prob = args | |
dropout_keep_prob = float(dropout_keep_prob) | |
assert 0 < dropout_keep_prob <= 1.0, "keep it real" | |
main(model_name, float(dropout_keep_prob)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment