jtrecenti/gan-mnist-captcha.R

## gan-mnist-captcha.R
#' Author: Julio Trecenti
#' Subject: Mnist-Captcha AC-GAN, based on keras example
#' https://tensorflow.rstudio.com/keras/articles/examples/mnist_acgan.html

# library(tidyverse)
library(magrittr)
library(keras)
k_set_image_data_format('channels_first')

# Data preparation -------------------------------------------------------
mnist <- dataset_mnist()
one_hot <- function(x) {
  labs_em_fator <- factor(x, levels = 0:9)
  m <- model.matrix(~labs_em_fator - 1)
  attributes(m) <- list(dim = dim(m))
  keras::array_reshape(m, c(dim(m)[1], 1, dim(m)[2]))
}
juntar_xs <- function(x) {
  rd <- seq_len(nrow(x)) %% 4
  a <- abind::abind(
    x[rd==3,,], x[rd==2,,], x[rd==1,,], x[rd==0,,],
    along = 3
  )
  attributes(a) <- list(dim = dim(a))
  a
}
juntar_ys <- function(y) {
  rd <- seq_along(y) %% 4
  a <- abind::abind(
    one_hot(y[rd==3]), one_hot(y[rd==2]), one_hot(y[rd==1]), one_hot(y[rd==0]),
    along = 2
  )
  attributes(a) <- list(dim = dim(a))
  a
}
mnist$train$x <- juntar_xs(mnist$train$x)
mnist$train$y <- juntar_ys(mnist$train$y)
mnist$test$x <- juntar_xs(mnist$test$x)
mnist$test$y <- juntar_ys(mnist$test$y)

num_train <- dim(mnist$train$x)[1]
num_test <- dim(mnist$test$x)[1]

# from -1 to 1
mnist$train$x <- (mnist$train$x - 127.5)/127.5
mnist$test$x <- (mnist$test$x - 127.5)/127.5
mnist$train$x <- array_reshape(mnist$train$x, c(15000, 1, 28, 28*4))
mnist$test$x <- array_reshape(mnist$test$x, c(2500, 1, 28, 28*4))


# Functions ---------------------------------------------------------------

build_generator <- function(){

  # We will map a pair of (z, L), where z is a latent vector and L is a
  # label drawn from P_c, to image space (..., 1, 28, 28)
  # cnn <- keras_model_sequential()


  # This is the z space commonly referred to in GAN papers
  latent <- layer_input(c(4, 10), name = "noise")
  # This will be our label
  image_class <- layer_input(c(4L, 10L), name = "sampled_labels")

  input <- layer_add(list(latent, image_class))

  # output <- input %>%
  #
  #   layer_flatten() %>%
  #   layer_dense(32, activation = "tanh") %>%
  #   layer_dense(32 * 7 * 7, activation = "tanh") %>%
  #   layer_reshape(c(32, 7, 7)) %>%
  #
  #   layer_upsampling_2d(size = c(2,2)) %>%
  #   layer_conv_2d(16, 3, padding = "same") %>%
  #   layer_activation_leaky_relu() %>%
  #
  #   layer_upsampling_2d(size = c(2,2)) %>%
  #   layer_conv_2d(8, 3, padding = "same") %>%
  #   layer_activation_leaky_relu() %>%
  #
  #   layer_upsampling_2d(size = c(1,2)) %>%
  #   layer_conv_2d(4, 3, padding = "same") %>%
  #   layer_activation_leaky_relu() %>%
  #
  #   layer_conv_2d(1, 3, padding = "same", activation = "tanh")

  output <- input %>%
    layer_flatten() %>%

    layer_dense(4 * 7 * 14, activation = "tanh") %>%
    layer_reshape(c(4, 7, 14)) %>%

    layer_conv_2d_transpose(64, 3, padding = "same", strides = c(2, 2)) %>%
    layer_activation_leaky_relu() %>%

    layer_conv_2d_transpose(64, 3, padding = "same", strides = c(2, 2)) %>%
    layer_activation_leaky_relu() %>%

    layer_conv_2d_transpose(32, 3, padding = "same", strides = c(1, 2)) %>%
    layer_activation_leaky_relu() %>%

    # Take a channel axis reduction
    layer_conv_2d( 1, 3, padding = "same", activation = "tanh")

  # 36 classes in mnist
  # cls <- image_class %>%
  #   layer_embedding(
  #     input_dim = 2 * 10,
  #     output_dim = latent_size,
  #     embeddings_initializer = 'glorot_normal'
  #   )

  # Hadamard product between z-space and a class conditional embedding]
  # latent_reshape <- latent %>%
  #   layer_reshape(c(2 * 10, latent_size))
  # h <- layer_multiply(list(latent, cls))


  keras_model(list(latent, image_class), output)
}

build_discriminator <- function() {

  # Build a relatively standard conv net, with LeakyReLUs as suggested in
  # the reference paper

  image <- layer_input(shape = c(1, 28, 28*4))
  output <- image %>%
    layer_conv_2d(32, 5, padding = "same") %>%
    layer_activation_leaky_relu() %>%
    layer_max_pooling_2d() %>%
    layer_conv_2d(64, 5, padding = "same") %>%
    layer_activation_leaky_relu() %>%
    layer_max_pooling_2d() %>%
    layer_conv_2d(64, 3, padding = "same") %>%
    layer_activation_leaky_relu() %>%
    layer_flatten() %>%
    layer_dense(64, activation = "relu") %>%
    layer_dropout(0.2) %>%
    layer_dense(128, activation = "relu")

  cnn <- keras_model(image, output)
  features <- cnn(image)

  # First output (name=generation) is whether or not the discriminator
  # thinks the image that is being shown is fake, and the second output
  # (name=auxiliary) is the class that the discriminator thinks the image
  # belongs to.
  fake <- features %>%
    layer_dense(32, activation = "tanh") %>%
    layer_dense(1, activation = "sigmoid", name = "generation")

  aux <- features %>%
    layer_dense(4 * 10, activation = "relu") %>%
    layer_reshape(c(4, 10)) %>%
    layer_activation("softmax", name = "auxiliary")

  keras_model(image, list(fake, aux))
}

# Parameters --------------------------------------------------------------

# Batch and latent size taken from the paper
epochs <- 50
batch_size <- 100
adam_lr <- 0.0008

# Model Definition --------------------------------------------------------

# Build the discriminator
discriminator <- build_discriminator()
discriminator %>% compile(
  optimizer = optimizer_adam(lr = adam_lr),
  loss = list("binary_crossentropy", "categorical_crossentropy"),
  metrics = list("accuracy", "accuracy")
)

# Build the generator
generator <- build_generator()
generator %>% compile(
  optimizer = optimizer_adam(lr = adam_lr),
  loss = "binary_crossentropy"
)

latent <- layer_input(shape = list(4L, 10L))
image_class <- layer_input(shape = list(4L, 10L))
fake <- generator(list(latent, image_class))

# Only want to be able to train generation for the combined model
freeze_weights(discriminator)
results <- discriminator(fake)

combined <- keras_model(list(latent, image_class), results)
combined %>% compile(
  optimizer = optimizer_adam(lr = adam_lr),
  loss = list("binary_crossentropy", "categorical_crossentropy")
)

combined


# Helper Functions -----------------------------------------------------------

transform_to_matrix <- function(x) {
  labs_em_fator <- factor(x, levels = 0:9)
  m <- model.matrix(~labs_em_fator - 1)
  attributes(m) <- list(dim = c(4, 10))
  m
}

generate_image <- function(num = sample(0:9, 4)) {
  # Noise
  noise <- rnorm(4 * 10, 0, .01) %>%
    array(dim = c(4, 10)) %>%
    array_reshape(c(1, dim(.)))

  # Digits
  sampled_labels <- num %>%
    matrix(ncol = 4) %>%
    transform_to_matrix() %>%
    array_reshape(c(1, dim(.)))

  resp <- paste(c(0:9)[apply(sampled_labels[1,,], 1, which.max)],
                collapse = "")
  print(resp)

  # Apply generator
  generated_images <- predict(
    generator,
    list(noise, sampled_labels)
  )

  img <- generated_images[1,1,,]
  plot(as.raster((img+1)/2))
}

generate_image()

report <- function(generator_train_loss, generator_test_loss,
                   discriminator_train_loss, discriminator_test_loss) {
  # Generate an epoch report on performance
  row_fmt <- "\n%22s : loss %4.2f | %5.2f | %5.2f"
  row_fmt2 <- "\n%22s : loss %4.2f | %5.2f | %5.2f | %5.2f | %5.2f"
  cat(sprintf(
    row_fmt,
    "generator (train)",
    generator_train_loss[1],
    generator_train_loss[2],
    generator_train_loss[3]
  ))
  cat(sprintf(
    row_fmt,
    "generator (test)",
    generator_test_loss[1],
    generator_test_loss[2],
    generator_test_loss[3]
  ))

  cat(sprintf(
    row_fmt2,
    "discriminator (train)",
    discriminator_train_loss[1],
    discriminator_train_loss[2],
    discriminator_train_loss[3],
    discriminator_train_loss[4],
    discriminator_train_loss[6]
  ))

  cat(sprintf(
    row_fmt2,
    "discriminator (test)",
    discriminator_test_loss[1],
    discriminator_test_loss[2],
    discriminator_test_loss[3],
    discriminator_test_loss[4],
    discriminator_test_loss[6]
  ))

  cat("\n")

}

# Training ----------------------------------------------------------------

for(epoch in 1:epochs) {

  num_batches <- trunc(num_train / batch_size)
  pb <- progress::progress_bar$new(
    total = num_batches,
    format = sprintf("epoch %s/%s :elapsed [:bar] :percent :eta", epoch, epochs),
    clear = FALSE
  )

  epoch_gen_loss <- NULL
  epoch_disc_loss <- NULL
  possible_indexes <- 1:num_train

  for(index in 1:num_batches) {

    pb$tick()

    # Generate a new batch of noise
    noise <- rnorm(4 * 10 * batch_size, 0, 0.1) %>%
      array(dim = c(batch_size, 4, 10))

    # Get a batch of real images
    batch <- sample(possible_indexes, size = batch_size)
    possible_indexes <- possible_indexes[!possible_indexes %in% batch]
    image_batch <- mnist$train$x[batch,,,,drop = FALSE]
    label_batch <- mnist$train$y[batch,,,drop = FALSE]

    # Sample some labels from p_c
    sampled_labels <- sample(0:9, batch_size * 4, replace = TRUE) %>%
      matrix(ncol = 4) %>%
      plyr::aaply(1, transform_to_matrix)

    # Generate a batch of fake images, using the generated labels as a
    # conditioner. We reshape the sampled labels to be
    # (batch_size, 1) so that we can feed them into the embedding
    # layer as a length one sequence
    generated_images <- predict(generator, list(noise, sampled_labels))

    X <- abind::abind(image_batch, generated_images, along = 1)
    y <- matrix(c(rep(1L, batch_size), rep(0L, batch_size)), ncol = 1)
    aux_y <- abind::abind(label_batch, sampled_labels, along = 1)

    # Check if the discriminator can figure itself out
    disc_loss <- train_on_batch(discriminator, x = X, y = list(y, aux_y))
    epoch_disc_loss <- rbind(epoch_disc_loss, unlist(disc_loss))

    # Make new noise. Generate 2 * batch size here such that
    # the generator optimizes over an identical number of images as the
    # discriminator
    noise <- rnorm(2 * 4 * 10 * batch_size, 0, 0.1) %>%
      array(dim = c(2 * batch_size, 4, 10))
    sampled_labels <- sample(0:9, 2 * batch_size * 4, replace = TRUE) %>%
      matrix(ncol = 4) %>%
      plyr::aaply(1, transform_to_matrix)

    # Want to train the generator to trick the discriminator
    # For the generator, we want all the {fake, not-fake} labels to say
    # not-fake
    trick <- matrix(rep(1, 2 * batch_size), ncol = 1)

    combined_loss <- train_on_batch(
      object = combined,
      x = list(noise, sampled_labels),
      y = list(trick, sampled_labels)
    )

    epoch_gen_loss <- rbind(epoch_gen_loss, unlist(combined_loss))

  }

  cat(sprintf("\nTesting for epoch %02d:", epoch))

  # Evaluate the testing loss here
  # Generate a new batch of noise
  noise <- rnorm(4 * 10 * num_test, 0, 0.1) %>%
    array(dim = c(num_test, 4, 10))

  # Sample some labels from p_c and generate images from them
  sampled_labels <- sample(0:9, num_test * 4, replace = TRUE) %>%
    matrix(ncol = 4) %>%
    plyr::aaply(1, transform_to_matrix)

  generated_images <- predict(generator, list(noise, sampled_labels))

  X <- abind::abind(mnist$test$x, generated_images, along = 1)
  y <- matrix(c(rep(1, num_test), rep(0, num_test)), ncol = 1)
  aux_y <- abind::abind(mnist$test$y, sampled_labels, along = 1)

  # See if the discriminator can figure itself out...
  discriminator_test_loss <- evaluate(
    discriminator, X, list(y, aux_y),
    verbose = FALSE
  ) %>% unlist()

  discriminator_train_loss <- apply(epoch_disc_loss, 2, mean)

  # Make new noise
  noise <- rnorm(2 * 4 * 10 * num_test, 0, 0.1) %>%
    array(dim = c(2 * num_test, 4, 10))
  sampled_labels <- sample(0:9, 2 * num_test * 4, replace = TRUE) %>%
    matrix(ncol = 4) %>%
    plyr::aaply(1, transform_to_matrix)

  trick <- matrix(rep(1, 2 * num_test), ncol = 1)

  generator_test_loss = combined %>%
    evaluate(
      list(noise, sampled_labels),
      list(trick, sampled_labels),
      verbose = FALSE
    )

  generator_train_loss <- apply(epoch_gen_loss, 2, mean)

  report(
    generator_train_loss,
    generator_test_loss,
    discriminator_train_loss,
    discriminator_test_loss
  )

  generate_image()

}
	#' Author: Julio Trecenti
	#' Subject: Mnist-Captcha AC-GAN, based on keras example
	#' https://tensorflow.rstudio.com/keras/articles/examples/mnist_acgan.html

	# library(tidyverse)
	library(magrittr)
	library(keras)
	k_set_image_data_format('channels_first')

	# Data preparation -------------------------------------------------------
	mnist <- dataset_mnist()
	one_hot <- function(x) {
	labs_em_fator <- factor(x, levels = 0:9)
	m <- model.matrix(~labs_em_fator - 1)
	attributes(m) <- list(dim = dim(m))
	keras::array_reshape(m, c(dim(m)[1], 1, dim(m)[2]))
	}
	juntar_xs <- function(x) {
	rd <- seq_len(nrow(x)) %% 4
	a <- abind::abind(
	x[rd==3,,], x[rd==2,,], x[rd==1,,], x[rd==0,,],
	along = 3
	)
	attributes(a) <- list(dim = dim(a))
	a
	}
	juntar_ys <- function(y) {
	rd <- seq_along(y) %% 4
	a <- abind::abind(
	one_hot(y[rd==3]), one_hot(y[rd==2]), one_hot(y[rd==1]), one_hot(y[rd==0]),
	along = 2
	)
	attributes(a) <- list(dim = dim(a))
	a
	}
	mnist$train$x <- juntar_xs(mnist$train$x)
	mnist$train$y <- juntar_ys(mnist$train$y)
	mnist$test$x <- juntar_xs(mnist$test$x)
	mnist$test$y <- juntar_ys(mnist$test$y)

	num_train <- dim(mnist$train$x)[1]
	num_test <- dim(mnist$test$x)[1]

	# from -1 to 1
	mnist$train$x <- (mnist$train$x - 127.5)/127.5
	mnist$test$x <- (mnist$test$x - 127.5)/127.5
	mnist$train$x <- array_reshape(mnist$train$x, c(15000, 1, 28, 28*4))
	mnist$test$x <- array_reshape(mnist$test$x, c(2500, 1, 28, 28*4))


	# Functions ---------------------------------------------------------------

	build_generator <- function(){

	# We will map a pair of (z, L), where z is a latent vector and L is a
	# label drawn from P_c, to image space (..., 1, 28, 28)
	# cnn <- keras_model_sequential()


	# This is the z space commonly referred to in GAN papers
	latent <- layer_input(c(4, 10), name = "noise")
	# This will be our label
	image_class <- layer_input(c(4L, 10L), name = "sampled_labels")

	input <- layer_add(list(latent, image_class))

	# output <- input %>%
	#
	# layer_flatten() %>%
	# layer_dense(32, activation = "tanh") %>%
	# layer_dense(32 * 7 * 7, activation = "tanh") %>%
	# layer_reshape(c(32, 7, 7)) %>%
	#
	# layer_upsampling_2d(size = c(2,2)) %>%
	# layer_conv_2d(16, 3, padding = "same") %>%
	# layer_activation_leaky_relu() %>%
	#
	# layer_upsampling_2d(size = c(2,2)) %>%
	# layer_conv_2d(8, 3, padding = "same") %>%
	# layer_activation_leaky_relu() %>%
	#
	# layer_upsampling_2d(size = c(1,2)) %>%
	# layer_conv_2d(4, 3, padding = "same") %>%
	# layer_activation_leaky_relu() %>%
	#
	# layer_conv_2d(1, 3, padding = "same", activation = "tanh")

	output <- input %>%
	layer_flatten() %>%

	layer_dense(4 * 7 * 14, activation = "tanh") %>%
	layer_reshape(c(4, 7, 14)) %>%

	layer_conv_2d_transpose(64, 3, padding = "same", strides = c(2, 2)) %>%
	layer_activation_leaky_relu() %>%

	layer_conv_2d_transpose(64, 3, padding = "same", strides = c(2, 2)) %>%
	layer_activation_leaky_relu() %>%

	layer_conv_2d_transpose(32, 3, padding = "same", strides = c(1, 2)) %>%
	layer_activation_leaky_relu() %>%

	# Take a channel axis reduction
	layer_conv_2d( 1, 3, padding = "same", activation = "tanh")

	# 36 classes in mnist
	# cls <- image_class %>%
	# layer_embedding(
	# input_dim = 2 * 10,
	# output_dim = latent_size,
	# embeddings_initializer = 'glorot_normal'
	# )

	# Hadamard product between z-space and a class conditional embedding]
	# latent_reshape <- latent %>%
	# layer_reshape(c(2 * 10, latent_size))
	# h <- layer_multiply(list(latent, cls))


	keras_model(list(latent, image_class), output)
	}

	build_discriminator <- function() {

	# Build a relatively standard conv net, with LeakyReLUs as suggested in
	# the reference paper

	image <- layer_input(shape = c(1, 28, 28*4))
	output <- image %>%
	layer_conv_2d(32, 5, padding = "same") %>%
	layer_activation_leaky_relu() %>%
	layer_max_pooling_2d() %>%
	layer_conv_2d(64, 5, padding = "same") %>%
	layer_activation_leaky_relu() %>%
	layer_max_pooling_2d() %>%
	layer_conv_2d(64, 3, padding = "same") %>%
	layer_activation_leaky_relu() %>%
	layer_flatten() %>%
	layer_dense(64, activation = "relu") %>%
	layer_dropout(0.2) %>%
	layer_dense(128, activation = "relu")

	cnn <- keras_model(image, output)
	features <- cnn(image)

	# First output (name=generation) is whether or not the discriminator
	# thinks the image that is being shown is fake, and the second output
	# (name=auxiliary) is the class that the discriminator thinks the image
	# belongs to.
	fake <- features %>%
	layer_dense(32, activation = "tanh") %>%
	layer_dense(1, activation = "sigmoid", name = "generation")

	aux <- features %>%
	layer_dense(4 * 10, activation = "relu") %>%
	layer_reshape(c(4, 10)) %>%
	layer_activation("softmax", name = "auxiliary")

	keras_model(image, list(fake, aux))
	}

	# Parameters --------------------------------------------------------------

	# Batch and latent size taken from the paper
	epochs <- 50
	batch_size <- 100
	adam_lr <- 0.0008

	# Model Definition --------------------------------------------------------

	# Build the discriminator
	discriminator <- build_discriminator()
	discriminator %>% compile(
	optimizer = optimizer_adam(lr = adam_lr),
	loss = list("binary_crossentropy", "categorical_crossentropy"),
	metrics = list("accuracy", "accuracy")
	)

	# Build the generator
	generator <- build_generator()
	generator %>% compile(
	optimizer = optimizer_adam(lr = adam_lr),
	loss = "binary_crossentropy"
	)

	latent <- layer_input(shape = list(4L, 10L))
	image_class <- layer_input(shape = list(4L, 10L))
	fake <- generator(list(latent, image_class))

	# Only want to be able to train generation for the combined model
	freeze_weights(discriminator)
	results <- discriminator(fake)

	combined <- keras_model(list(latent, image_class), results)
	combined %>% compile(
	optimizer = optimizer_adam(lr = adam_lr),
	loss = list("binary_crossentropy", "categorical_crossentropy")
	)

	combined



	# Helper Functions -----------------------------------------------------------

	transform_to_matrix <- function(x) {
	labs_em_fator <- factor(x, levels = 0:9)
	m <- model.matrix(~labs_em_fator - 1)
	attributes(m) <- list(dim = c(4, 10))
	m
	}

	generate_image <- function(num = sample(0:9, 4)) {
	# Noise
	noise <- rnorm(4 * 10, 0, .01) %>%
	array(dim = c(4, 10)) %>%
	array_reshape(c(1, dim(.)))

	# Digits
	sampled_labels <- num %>%
	matrix(ncol = 4) %>%
	transform_to_matrix() %>%
	array_reshape(c(1, dim(.)))

	resp <- paste(c(0:9)[apply(sampled_labels[1,,], 1, which.max)],
	collapse = "")
	print(resp)

	# Apply generator
	generated_images <- predict(
	generator,
	list(noise, sampled_labels)
	)

	img <- generated_images[1,1,,]
	plot(as.raster((img+1)/2))
	}

	generate_image()

	report <- function(generator_train_loss, generator_test_loss,
	discriminator_train_loss, discriminator_test_loss) {
	# Generate an epoch report on performance
	row_fmt <- "\n%22s : loss %4.2f \| %5.2f \| %5.2f"
	row_fmt2 <- "\n%22s : loss %4.2f \| %5.2f \| %5.2f \| %5.2f \| %5.2f"
	cat(sprintf(
	row_fmt,
	"generator (train)",
	generator_train_loss[1],
	generator_train_loss[2],
	generator_train_loss[3]
	))
	cat(sprintf(
	row_fmt,
	"generator (test)",
	generator_test_loss[1],
	generator_test_loss[2],
	generator_test_loss[3]
	))

	cat(sprintf(
	row_fmt2,
	"discriminator (train)",
	discriminator_train_loss[1],
	discriminator_train_loss[2],
	discriminator_train_loss[3],
	discriminator_train_loss[4],
	discriminator_train_loss[6]
	))

	cat(sprintf(
	row_fmt2,
	"discriminator (test)",
	discriminator_test_loss[1],
	discriminator_test_loss[2],
	discriminator_test_loss[3],
	discriminator_test_loss[4],
	discriminator_test_loss[6]
	))

	cat("\n")

	}

	# Training ----------------------------------------------------------------

	for(epoch in 1:epochs) {

	num_batches <- trunc(num_train / batch_size)
	pb <- progress::progress_bar$new(
	total = num_batches,
	format = sprintf("epoch %s/%s :elapsed [:bar] :percent :eta", epoch, epochs),
	clear = FALSE
	)

	epoch_gen_loss <- NULL
	epoch_disc_loss <- NULL
	possible_indexes <- 1:num_train

	for(index in 1:num_batches) {

	pb$tick()

	# Generate a new batch of noise
	noise <- rnorm(4 * 10 * batch_size, 0, 0.1) %>%
	array(dim = c(batch_size, 4, 10))

	# Get a batch of real images
	batch <- sample(possible_indexes, size = batch_size)
	possible_indexes <- possible_indexes[!possible_indexes %in% batch]
	image_batch <- mnist$train$x[batch,,,,drop = FALSE]
	label_batch <- mnist$train$y[batch,,,drop = FALSE]

	# Sample some labels from p_c
	sampled_labels <- sample(0:9, batch_size * 4, replace = TRUE) %>%
	matrix(ncol = 4) %>%
	plyr::aaply(1, transform_to_matrix)

	# Generate a batch of fake images, using the generated labels as a
	# conditioner. We reshape the sampled labels to be
	# (batch_size, 1) so that we can feed them into the embedding
	# layer as a length one sequence
	generated_images <- predict(generator, list(noise, sampled_labels))

	X <- abind::abind(image_batch, generated_images, along = 1)
	y <- matrix(c(rep(1L, batch_size), rep(0L, batch_size)), ncol = 1)
	aux_y <- abind::abind(label_batch, sampled_labels, along = 1)

	# Check if the discriminator can figure itself out
	disc_loss <- train_on_batch(discriminator, x = X, y = list(y, aux_y))
	epoch_disc_loss <- rbind(epoch_disc_loss, unlist(disc_loss))

	# Make new noise. Generate 2 * batch size here such that
	# the generator optimizes over an identical number of images as the
	# discriminator
	noise <- rnorm(2 * 4 * 10 * batch_size, 0, 0.1) %>%
	array(dim = c(2 * batch_size, 4, 10))
	sampled_labels <- sample(0:9, 2 * batch_size * 4, replace = TRUE) %>%
	matrix(ncol = 4) %>%
	plyr::aaply(1, transform_to_matrix)

	# Want to train the generator to trick the discriminator
	# For the generator, we want all the {fake, not-fake} labels to say
	# not-fake
	trick <- matrix(rep(1, 2 * batch_size), ncol = 1)

	combined_loss <- train_on_batch(
	object = combined,
	x = list(noise, sampled_labels),
	y = list(trick, sampled_labels)
	)

	epoch_gen_loss <- rbind(epoch_gen_loss, unlist(combined_loss))

	}

	cat(sprintf("\nTesting for epoch %02d:", epoch))

	# Evaluate the testing loss here
	# Generate a new batch of noise
	noise <- rnorm(4 * 10 * num_test, 0, 0.1) %>%
	array(dim = c(num_test, 4, 10))

	# Sample some labels from p_c and generate images from them
	sampled_labels <- sample(0:9, num_test * 4, replace = TRUE) %>%
	matrix(ncol = 4) %>%
	plyr::aaply(1, transform_to_matrix)

	generated_images <- predict(generator, list(noise, sampled_labels))

	X <- abind::abind(mnist$test$x, generated_images, along = 1)
	y <- matrix(c(rep(1, num_test), rep(0, num_test)), ncol = 1)
	aux_y <- abind::abind(mnist$test$y, sampled_labels, along = 1)

	# See if the discriminator can figure itself out...
	discriminator_test_loss <- evaluate(
	discriminator, X, list(y, aux_y),
	verbose = FALSE
	) %>% unlist()

	discriminator_train_loss <- apply(epoch_disc_loss, 2, mean)

	# Make new noise
	noise <- rnorm(2 * 4 * 10 * num_test, 0, 0.1) %>%
	array(dim = c(2 * num_test, 4, 10))
	sampled_labels <- sample(0:9, 2 * num_test * 4, replace = TRUE) %>%
	matrix(ncol = 4) %>%
	plyr::aaply(1, transform_to_matrix)

	trick <- matrix(rep(1, 2 * num_test), ncol = 1)

	generator_test_loss = combined %>%
	evaluate(
	list(noise, sampled_labels),
	list(trick, sampled_labels),
	verbose = FALSE
	)

	generator_train_loss <- apply(epoch_gen_loss, 2, mean)

	report(
	generator_train_loss,
	generator_test_loss,
	discriminator_train_loss,
	discriminator_test_loss
	)

	generate_image()

	}