Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jperl/6abf8ac310bb62ae8106c287364fce17 to your computer and use it in GitHub Desktop.
Save jperl/6abf8ac310bb62ae8106c287364fce17 to your computer and use it in GitHub Desktop.
Implementing Dropout as a Bayesian Approximation in TensorFlow
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.contrib.distributions import Bernoulli
class VariationalDense:
"""Variational Dense Layer Class"""
def __init__(self, n_in, n_out, model_prob, model_lam):
self.model_prob = model_prob
self.model_lam = model_lam
self.model_bern = Bernoulli(probs=self.model_prob, dtype=tf.float32)
self.model_M = tf.Variable(tf.truncated_normal([n_in, n_out], stddev=0.01))
self.model_m = tf.Variable(tf.zeros([n_out]))
self.model_W = tf.matmul(
tf.diag(self.model_bern.sample((n_in, ))), self.model_M
)
def __call__(self, X, activation=tf.identity):
output = activation(tf.matmul(X, self.model_W) + self.model_m)
if self.model_M.shape[1] == 1:
output = tf.squeeze(output)
return output
@property
def regularization(self):
return self.model_lam * (
self.model_prob * tf.reduce_sum(tf.square(self.model_M)) +
tf.reduce_sum(tf.square(self.model_m))
)
# Created sample data.
n_samples = 20
X = np.random.normal(size=(n_samples, 1))
y = np.random.normal(np.cos(5.*X) / (np.abs(X) + 1.), 0.1).ravel()
X_pred = np.atleast_2d(np.linspace(-3., 3., num=100)).T
X = np.hstack((X, X**2, X**3))
X_pred = np.hstack((X_pred, X_pred**2, X_pred**3))
# Create the TensorFlow model.
n_feats = X.shape[1]
n_hidden = 100
model_prob = 0.9
model_lam = 1e-2
model_X = tf.placeholder(tf.float32, [None, n_feats])
model_y = tf.placeholder(tf.float32, [None])
model_L_1 = VariationalDense(n_feats, n_hidden, model_prob, model_lam)
model_L_2 = VariationalDense(n_hidden, n_hidden, model_prob, model_lam)
model_L_3 = VariationalDense(n_hidden, 1, model_prob, model_lam)
model_out_1 = model_L_1(model_X, tf.nn.relu)
model_out_2 = model_L_2(model_out_1, tf.nn.relu)
model_pred = model_L_3(model_out_2)
model_sse = tf.reduce_sum(tf.square(model_y - model_pred))
model_mse = model_sse / n_samples
model_loss = (
# Negative log-likelihood.
model_sse +
# Regularization.
model_L_1.regularization +
model_L_2.regularization +
model_L_3.regularization
) / n_samples
train_step = tf.train.AdamOptimizer(1e-3).minimize(model_loss)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(10000):
sess.run(train_step, {model_X: X, model_y: y})
if i % 100 == 0:
mse = sess.run(model_mse, {model_X: X, model_y: y})
print("Iteration {}. Mean squared error: {:.4f}.".format(i, mse))
# Sample from the posterior.
n_post = 1000
Y_post = np.zeros((n_post, X_pred.shape[0]))
for i in range(n_post):
Y_post[i] = sess.run(model_pred, {model_X: X_pred})
if True:
plt.figure(figsize=(8, 6))
for i in range(n_post):
plt.plot(X_pred[:, 0], Y_post[i], "b-", alpha=1. / 200)
plt.plot(X[:, 0], y, "r.")
plt.grid()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment