-
-
Save IainNZ/541e8add942a436d813ebdfccc16b96c to your computer and use it in GitHub Desktop.
Julia VAE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#= | |
Recreates the variational autoencoder visualization experiment from the paper | |
"Auto-Encoding Variational Bayes" by Kingma and Welling. The paper is available | |
at https://arxiv.org/abs/1312.6114. | |
=# | |
import TensorFlow | |
const tf = TensorFlow | |
include("mnist_loader.jl") | |
# Configuration for experiment. | |
MNIST_SIZE = 784 # Number of pixels in MNIST images. | |
BATCH_SIZE = 100 # Number of images in a minibatch. | |
HIDDEN_SIZE = 100 # Hidden layer size. | |
LATENT_SIZE = 2 # Size (dimension) of latent representation. | |
""" | |
Add a fully-connected linear layer to the graph. | |
Given an input tensor X of shape [BATCH, INPUT_SIZE], create a linear layer | |
with weights (shape [INPUT_SIZE, OUTPUT_SIZE]) and biases (shape [OUTPUT_SIZE]), | |
and return a new tensor Y = X * W + b (shape [BATCH, OUTPUT_SIZE]). Weights are | |
initialized with standard normals, and biases are initialized to zero. | |
Args: | |
input_tensor: 2-D tensor with shape [BATCH, input_size]. | |
input_size: Length of second dimension of input_tensor. | |
output_size: Length of second dimension of output_tensor. | |
Returns: | |
output_tensor: 2-D tensor with shape [BATCH, output_size], the output of | |
the linear layer. | |
""" | |
function add_linear_layer(input_tensor, input_size, output_size) | |
# TODO: Use shape inference to remove need for input_size. | |
weights = tf.Variable(randn(Float32, input_size, output_size)) | |
biases = tf.Variable(zeros(Float32, output_size)) | |
return input_tensor * weights + biases | |
end | |
""" | |
Add a Gaussian distribution parameter encoder to the graph. | |
Args: | |
input_tensor: 2-D tensor with shape [BATCH, INPUT_SIZE]. | |
input_size: Size of second dimension of input_tensor. | |
hidden_size: Size of hidden layer. | |
latent_size: Size of latent space. | |
Retuns: | |
mu: Means for latent space Gaussian. | |
logsigma2: Log variance for latent space Gaussian. | |
""" | |
function create_encoder(input_tensor, input_size, hidden_size, latent_size) | |
# Create hidden layer (linear with tanh activation). | |
hidden = tf.nn.tanh(add_linear_layer(input_tensor, input_size, hidden_size)) | |
# Gaussian parameters are linear transformations of hidden layer. | |
mu = add_linear_layer(hidden, hidden_size, latent_size) | |
logsigma2 = add_linear_layer(hidden, hidden_size, latent_size) | |
return mu, logsigma2 | |
end | |
""" | |
Add a Bernoulli distribution parameter decoder to the graph. | |
Args: | |
latent_tensor: 2-D tensor with shape [BATCH, latent_size]. | |
latent_size: Size of second dimension of latent_tensor. | |
hidden_size: Size of hidden layer. | |
output_size: Size of second dimension of output_probs. | |
Returns: | |
output_probs: 2-D of tensor of probabilities, shape [BATCH, output_size]. | |
""" | |
function create_decoder(latent_tensor, latent_size, hidden_size, output_size) | |
# Create hidden layer (linear with tanh activation). | |
hidden = tf.nn.tanh(add_linear_layer(latent_tensor, latent_size, | |
hidden_size)) | |
# Probabilities are produced from a sigmoid activation to ensure [0,1]. | |
output_probs = tf.nn.sigmoid(add_linear_layer(hidden, hidden_size, | |
output_size)) | |
return output_probs | |
end | |
function vae_experiment() | |
# Prepare MNIST images. | |
loader = DataLoader() | |
# Create an input placeholder. | |
input_ph = tf.placeholder(Float32, shape=(BATCH_SIZE, MNIST_SIZE)) | |
# Construct an encoder that produces Gaussian distribution parameters from | |
# the MNIST images. | |
mu, logsigma2 = create_encoder(input_ph, MNIST_SIZE, HIDDEN_SIZE, | |
LATENT_SIZE) | |
# TODO: No random distribution ops currently, so we need to manually the | |
# draws from a multivariate normal. | |
normal_ph = tf.placeholder(Float32, shape=(BATCH_SIZE, LATENT_SIZE)) | |
# Sample from latent distribution. | |
sigma = exp(0.5 * logsigma2) | |
sample = mu + sigma .* normal_ph | |
# Decode samples to Bernoulli distribution parameters (probabilities). | |
output_probs = create_decoder(sample, LATENT_SIZE, HIDDEN_SIZE, MNIST_SIZE) | |
# Calculate 'loss' function for the VAE. It is comprised of two parts, and | |
# unlike say L2 reconstruction loss in a normal autoencoder, we will be | |
# trying to maximize the "evidence lower bound" (ELBO). | |
# First part: KL divergence from the prior. This acts as a regularizer. | |
kl_divergence = 0.5 * tf.reduce_sum(1 + logsigma2 - mu.^2- exp(logsigma2), | |
reduction_indices=[2]) | |
# Second part: Reconstruction loss. | |
recon_loss = tf.reduce_sum(input_ph .* log(output_probs) + | |
(1 - input_ph) .* log(1 - output_probs), | |
reduction_indices=[2]) | |
objective = tf.reduce_mean(kl_divergence + recon_loss) | |
# Create optimizer. | |
train_step = tf.train.minimize(tf.train.GradientDescentOptimizer(0.01), | |
-objective) | |
# Initialize the weights and biases. | |
sess = tf.Session(tf.Graph()) | |
println("Initializing variables...") | |
tf.run(sess, tf.initialize_all_variables()) | |
println("Rekt") | |
# Optimize the parameters of the VAE. | |
for iteration in 1:10 | |
# Sample next batch of MNIST images. | |
batch = next_batch(loader, BATCH_SIZE) | |
# Sample from a multivariate normal distribution. | |
normal_sample = randn(BATCH_SIZE, LATENT_SIZE) | |
#cur_objective, _ = tf.run(sess, [objective, train_step], | |
# Dict(input_ph=>batch[1], | |
# normal_ph=>normal_sample)) | |
print("foo") | |
cur_objective = tf.run(sess, objective, | |
Dict(input_ph=>batch[1], | |
normal_ph=>normal_sample)) | |
print("bar") | |
tf.run(sess, train_step, | |
Dict(input_ph=>batch[1], | |
normal_ph=>normal_sample)) | |
@printf("Iter. %4d, objective: %7.2f\n", iteration, cur_objective) | |
end | |
end | |
vae_experiment() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment