Skip to content

Instantly share code, notes, and snippets.

@IainNZ
Created September 4, 2016 15:19
Show Gist options
  • Save IainNZ/541e8add942a436d813ebdfccc16b96c to your computer and use it in GitHub Desktop.
Save IainNZ/541e8add942a436d813ebdfccc16b96c to your computer and use it in GitHub Desktop.
Julia VAE
#=
Recreates the variational autoencoder visualization experiment from the paper
"Auto-Encoding Variational Bayes" by Kingma and Welling. The paper is available
at https://arxiv.org/abs/1312.6114.
=#
import TensorFlow
const tf = TensorFlow
include("mnist_loader.jl")
# Configuration for experiment.
MNIST_SIZE = 784 # Number of pixels in MNIST images.
BATCH_SIZE = 100 # Number of images in a minibatch.
HIDDEN_SIZE = 100 # Hidden layer size.
LATENT_SIZE = 2 # Size (dimension) of latent representation.
"""
Add a fully-connected linear layer to the graph.
Given an input tensor X of shape [BATCH, INPUT_SIZE], create a linear layer
with weights (shape [INPUT_SIZE, OUTPUT_SIZE]) and biases (shape [OUTPUT_SIZE]),
and return a new tensor Y = X * W + b (shape [BATCH, OUTPUT_SIZE]). Weights are
initialized with standard normals, and biases are initialized to zero.
Args:
input_tensor: 2-D tensor with shape [BATCH, input_size].
input_size: Length of second dimension of input_tensor.
output_size: Length of second dimension of output_tensor.
Returns:
output_tensor: 2-D tensor with shape [BATCH, output_size], the output of
the linear layer.
"""
function add_linear_layer(input_tensor, input_size, output_size)
# TODO: Use shape inference to remove need for input_size.
weights = tf.Variable(randn(Float32, input_size, output_size))
biases = tf.Variable(zeros(Float32, output_size))
return input_tensor * weights + biases
end
"""
Add a Gaussian distribution parameter encoder to the graph.
Args:
input_tensor: 2-D tensor with shape [BATCH, INPUT_SIZE].
input_size: Size of second dimension of input_tensor.
hidden_size: Size of hidden layer.
latent_size: Size of latent space.
Retuns:
mu: Means for latent space Gaussian.
logsigma2: Log variance for latent space Gaussian.
"""
function create_encoder(input_tensor, input_size, hidden_size, latent_size)
# Create hidden layer (linear with tanh activation).
hidden = tf.nn.tanh(add_linear_layer(input_tensor, input_size, hidden_size))
# Gaussian parameters are linear transformations of hidden layer.
mu = add_linear_layer(hidden, hidden_size, latent_size)
logsigma2 = add_linear_layer(hidden, hidden_size, latent_size)
return mu, logsigma2
end
"""
Add a Bernoulli distribution parameter decoder to the graph.
Args:
latent_tensor: 2-D tensor with shape [BATCH, latent_size].
latent_size: Size of second dimension of latent_tensor.
hidden_size: Size of hidden layer.
output_size: Size of second dimension of output_probs.
Returns:
output_probs: 2-D of tensor of probabilities, shape [BATCH, output_size].
"""
function create_decoder(latent_tensor, latent_size, hidden_size, output_size)
# Create hidden layer (linear with tanh activation).
hidden = tf.nn.tanh(add_linear_layer(latent_tensor, latent_size,
hidden_size))
# Probabilities are produced from a sigmoid activation to ensure [0,1].
output_probs = tf.nn.sigmoid(add_linear_layer(hidden, hidden_size,
output_size))
return output_probs
end
function vae_experiment()
# Prepare MNIST images.
loader = DataLoader()
# Create an input placeholder.
input_ph = tf.placeholder(Float32, shape=(BATCH_SIZE, MNIST_SIZE))
# Construct an encoder that produces Gaussian distribution parameters from
# the MNIST images.
mu, logsigma2 = create_encoder(input_ph, MNIST_SIZE, HIDDEN_SIZE,
LATENT_SIZE)
# TODO: No random distribution ops currently, so we need to manually the
# draws from a multivariate normal.
normal_ph = tf.placeholder(Float32, shape=(BATCH_SIZE, LATENT_SIZE))
# Sample from latent distribution.
sigma = exp(0.5 * logsigma2)
sample = mu + sigma .* normal_ph
# Decode samples to Bernoulli distribution parameters (probabilities).
output_probs = create_decoder(sample, LATENT_SIZE, HIDDEN_SIZE, MNIST_SIZE)
# Calculate 'loss' function for the VAE. It is comprised of two parts, and
# unlike say L2 reconstruction loss in a normal autoencoder, we will be
# trying to maximize the "evidence lower bound" (ELBO).
# First part: KL divergence from the prior. This acts as a regularizer.
kl_divergence = 0.5 * tf.reduce_sum(1 + logsigma2 - mu.^2- exp(logsigma2),
reduction_indices=[2])
# Second part: Reconstruction loss.
recon_loss = tf.reduce_sum(input_ph .* log(output_probs) +
(1 - input_ph) .* log(1 - output_probs),
reduction_indices=[2])
objective = tf.reduce_mean(kl_divergence + recon_loss)
# Create optimizer.
train_step = tf.train.minimize(tf.train.GradientDescentOptimizer(0.01),
-objective)
# Initialize the weights and biases.
sess = tf.Session(tf.Graph())
println("Initializing variables...")
tf.run(sess, tf.initialize_all_variables())
println("Rekt")
# Optimize the parameters of the VAE.
for iteration in 1:10
# Sample next batch of MNIST images.
batch = next_batch(loader, BATCH_SIZE)
# Sample from a multivariate normal distribution.
normal_sample = randn(BATCH_SIZE, LATENT_SIZE)
#cur_objective, _ = tf.run(sess, [objective, train_step],
# Dict(input_ph=>batch[1],
# normal_ph=>normal_sample))
print("foo")
cur_objective = tf.run(sess, objective,
Dict(input_ph=>batch[1],
normal_ph=>normal_sample))
print("bar")
tf.run(sess, train_step,
Dict(input_ph=>batch[1],
normal_ph=>normal_sample))
@printf("Iter. %4d, objective: %7.2f\n", iteration, cur_objective)
end
end
vae_experiment()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment