Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Quebrando Captchas - Parte VI: Redes Generativas Adversariais com Classificador Auxiliar - CODE
#' Author: Julio Trecenti
#' Subject: Mnist-Captcha AC-GAN, based on keras example
#' https://tensorflow.rstudio.com/keras/articles/examples/mnist_acgan.html
# library(tidyverse)
library(magrittr)
library(keras)
k_set_image_data_format('channels_first')
# Data preparation -------------------------------------------------------
mnist <- dataset_mnist()
one_hot <- function(x) {
labs_em_fator <- factor(x, levels = 0:9)
m <- model.matrix(~labs_em_fator - 1)
attributes(m) <- list(dim = dim(m))
keras::array_reshape(m, c(dim(m)[1], 1, dim(m)[2]))
}
juntar_xs <- function(x) {
rd <- seq_len(nrow(x)) %% 4
a <- abind::abind(
x[rd==3,,], x[rd==2,,], x[rd==1,,], x[rd==0,,],
along = 3
)
attributes(a) <- list(dim = dim(a))
a
}
juntar_ys <- function(y) {
rd <- seq_along(y) %% 4
a <- abind::abind(
one_hot(y[rd==3]), one_hot(y[rd==2]), one_hot(y[rd==1]), one_hot(y[rd==0]),
along = 2
)
attributes(a) <- list(dim = dim(a))
a
}
mnist$train$x <- juntar_xs(mnist$train$x)
mnist$train$y <- juntar_ys(mnist$train$y)
mnist$test$x <- juntar_xs(mnist$test$x)
mnist$test$y <- juntar_ys(mnist$test$y)
num_train <- dim(mnist$train$x)[1]
num_test <- dim(mnist$test$x)[1]
# from -1 to 1
mnist$train$x <- (mnist$train$x - 127.5)/127.5
mnist$test$x <- (mnist$test$x - 127.5)/127.5
mnist$train$x <- array_reshape(mnist$train$x, c(15000, 1, 28, 28*4))
mnist$test$x <- array_reshape(mnist$test$x, c(2500, 1, 28, 28*4))
# Functions ---------------------------------------------------------------
build_generator <- function(){
# We will map a pair of (z, L), where z is a latent vector and L is a
# label drawn from P_c, to image space (..., 1, 28, 28)
# cnn <- keras_model_sequential()
# This is the z space commonly referred to in GAN papers
latent <- layer_input(c(4, 10), name = "noise")
# This will be our label
image_class <- layer_input(c(4L, 10L), name = "sampled_labels")
input <- layer_add(list(latent, image_class))
# output <- input %>%
#
# layer_flatten() %>%
# layer_dense(32, activation = "tanh") %>%
# layer_dense(32 * 7 * 7, activation = "tanh") %>%
# layer_reshape(c(32, 7, 7)) %>%
#
# layer_upsampling_2d(size = c(2,2)) %>%
# layer_conv_2d(16, 3, padding = "same") %>%
# layer_activation_leaky_relu() %>%
#
# layer_upsampling_2d(size = c(2,2)) %>%
# layer_conv_2d(8, 3, padding = "same") %>%
# layer_activation_leaky_relu() %>%
#
# layer_upsampling_2d(size = c(1,2)) %>%
# layer_conv_2d(4, 3, padding = "same") %>%
# layer_activation_leaky_relu() %>%
#
# layer_conv_2d(1, 3, padding = "same", activation = "tanh")
output <- input %>%
layer_flatten() %>%
layer_dense(4 * 7 * 14, activation = "tanh") %>%
layer_reshape(c(4, 7, 14)) %>%
layer_conv_2d_transpose(64, 3, padding = "same", strides = c(2, 2)) %>%
layer_activation_leaky_relu() %>%
layer_conv_2d_transpose(64, 3, padding = "same", strides = c(2, 2)) %>%
layer_activation_leaky_relu() %>%
layer_conv_2d_transpose(32, 3, padding = "same", strides = c(1, 2)) %>%
layer_activation_leaky_relu() %>%
# Take a channel axis reduction
layer_conv_2d( 1, 3, padding = "same", activation = "tanh")
# 36 classes in mnist
# cls <- image_class %>%
# layer_embedding(
# input_dim = 2 * 10,
# output_dim = latent_size,
# embeddings_initializer = 'glorot_normal'
# )
# Hadamard product between z-space and a class conditional embedding]
# latent_reshape <- latent %>%
# layer_reshape(c(2 * 10, latent_size))
# h <- layer_multiply(list(latent, cls))
keras_model(list(latent, image_class), output)
}
build_discriminator <- function() {
# Build a relatively standard conv net, with LeakyReLUs as suggested in
# the reference paper
image <- layer_input(shape = c(1, 28, 28*4))
output <- image %>%
layer_conv_2d(32, 5, padding = "same") %>%
layer_activation_leaky_relu() %>%
layer_max_pooling_2d() %>%
layer_conv_2d(64, 5, padding = "same") %>%
layer_activation_leaky_relu() %>%
layer_max_pooling_2d() %>%
layer_conv_2d(64, 3, padding = "same") %>%
layer_activation_leaky_relu() %>%
layer_flatten() %>%
layer_dense(64, activation = "relu") %>%
layer_dropout(0.2) %>%
layer_dense(128, activation = "relu")
cnn <- keras_model(image, output)
features <- cnn(image)
# First output (name=generation) is whether or not the discriminator
# thinks the image that is being shown is fake, and the second output
# (name=auxiliary) is the class that the discriminator thinks the image
# belongs to.
fake <- features %>%
layer_dense(32, activation = "tanh") %>%
layer_dense(1, activation = "sigmoid", name = "generation")
aux <- features %>%
layer_dense(4 * 10, activation = "relu") %>%
layer_reshape(c(4, 10)) %>%
layer_activation("softmax", name = "auxiliary")
keras_model(image, list(fake, aux))
}
# Parameters --------------------------------------------------------------
# Batch and latent size taken from the paper
epochs <- 50
batch_size <- 100
adam_lr <- 0.0008
# Model Definition --------------------------------------------------------
# Build the discriminator
discriminator <- build_discriminator()
discriminator %>% compile(
optimizer = optimizer_adam(lr = adam_lr),
loss = list("binary_crossentropy", "categorical_crossentropy"),
metrics = list("accuracy", "accuracy")
)
# Build the generator
generator <- build_generator()
generator %>% compile(
optimizer = optimizer_adam(lr = adam_lr),
loss = "binary_crossentropy"
)
latent <- layer_input(shape = list(4L, 10L))
image_class <- layer_input(shape = list(4L, 10L))
fake <- generator(list(latent, image_class))
# Only want to be able to train generation for the combined model
freeze_weights(discriminator)
results <- discriminator(fake)
combined <- keras_model(list(latent, image_class), results)
combined %>% compile(
optimizer = optimizer_adam(lr = adam_lr),
loss = list("binary_crossentropy", "categorical_crossentropy")
)
combined
# Helper Functions -----------------------------------------------------------
transform_to_matrix <- function(x) {
labs_em_fator <- factor(x, levels = 0:9)
m <- model.matrix(~labs_em_fator - 1)
attributes(m) <- list(dim = c(4, 10))
m
}
generate_image <- function(num = sample(0:9, 4)) {
# Noise
noise <- rnorm(4 * 10, 0, .01) %>%
array(dim = c(4, 10)) %>%
array_reshape(c(1, dim(.)))
# Digits
sampled_labels <- num %>%
matrix(ncol = 4) %>%
transform_to_matrix() %>%
array_reshape(c(1, dim(.)))
resp <- paste(c(0:9)[apply(sampled_labels[1,,], 1, which.max)],
collapse = "")
print(resp)
# Apply generator
generated_images <- predict(
generator,
list(noise, sampled_labels)
)
img <- generated_images[1,1,,]
plot(as.raster((img+1)/2))
}
generate_image()
report <- function(generator_train_loss, generator_test_loss,
discriminator_train_loss, discriminator_test_loss) {
# Generate an epoch report on performance
row_fmt <- "\n%22s : loss %4.2f | %5.2f | %5.2f"
row_fmt2 <- "\n%22s : loss %4.2f | %5.2f | %5.2f | %5.2f | %5.2f"
cat(sprintf(
row_fmt,
"generator (train)",
generator_train_loss[1],
generator_train_loss[2],
generator_train_loss[3]
))
cat(sprintf(
row_fmt,
"generator (test)",
generator_test_loss[1],
generator_test_loss[2],
generator_test_loss[3]
))
cat(sprintf(
row_fmt2,
"discriminator (train)",
discriminator_train_loss[1],
discriminator_train_loss[2],
discriminator_train_loss[3],
discriminator_train_loss[4],
discriminator_train_loss[6]
))
cat(sprintf(
row_fmt2,
"discriminator (test)",
discriminator_test_loss[1],
discriminator_test_loss[2],
discriminator_test_loss[3],
discriminator_test_loss[4],
discriminator_test_loss[6]
))
cat("\n")
}
# Training ----------------------------------------------------------------
for(epoch in 1:epochs) {
num_batches <- trunc(num_train / batch_size)
pb <- progress::progress_bar$new(
total = num_batches,
format = sprintf("epoch %s/%s :elapsed [:bar] :percent :eta", epoch, epochs),
clear = FALSE
)
epoch_gen_loss <- NULL
epoch_disc_loss <- NULL
possible_indexes <- 1:num_train
for(index in 1:num_batches) {
pb$tick()
# Generate a new batch of noise
noise <- rnorm(4 * 10 * batch_size, 0, 0.1) %>%
array(dim = c(batch_size, 4, 10))
# Get a batch of real images
batch <- sample(possible_indexes, size = batch_size)
possible_indexes <- possible_indexes[!possible_indexes %in% batch]
image_batch <- mnist$train$x[batch,,,,drop = FALSE]
label_batch <- mnist$train$y[batch,,,drop = FALSE]
# Sample some labels from p_c
sampled_labels <- sample(0:9, batch_size * 4, replace = TRUE) %>%
matrix(ncol = 4) %>%
plyr::aaply(1, transform_to_matrix)
# Generate a batch of fake images, using the generated labels as a
# conditioner. We reshape the sampled labels to be
# (batch_size, 1) so that we can feed them into the embedding
# layer as a length one sequence
generated_images <- predict(generator, list(noise, sampled_labels))
X <- abind::abind(image_batch, generated_images, along = 1)
y <- matrix(c(rep(1L, batch_size), rep(0L, batch_size)), ncol = 1)
aux_y <- abind::abind(label_batch, sampled_labels, along = 1)
# Check if the discriminator can figure itself out
disc_loss <- train_on_batch(discriminator, x = X, y = list(y, aux_y))
epoch_disc_loss <- rbind(epoch_disc_loss, unlist(disc_loss))
# Make new noise. Generate 2 * batch size here such that
# the generator optimizes over an identical number of images as the
# discriminator
noise <- rnorm(2 * 4 * 10 * batch_size, 0, 0.1) %>%
array(dim = c(2 * batch_size, 4, 10))
sampled_labels <- sample(0:9, 2 * batch_size * 4, replace = TRUE) %>%
matrix(ncol = 4) %>%
plyr::aaply(1, transform_to_matrix)
# Want to train the generator to trick the discriminator
# For the generator, we want all the {fake, not-fake} labels to say
# not-fake
trick <- matrix(rep(1, 2 * batch_size), ncol = 1)
combined_loss <- train_on_batch(
object = combined,
x = list(noise, sampled_labels),
y = list(trick, sampled_labels)
)
epoch_gen_loss <- rbind(epoch_gen_loss, unlist(combined_loss))
}
cat(sprintf("\nTesting for epoch %02d:", epoch))
# Evaluate the testing loss here
# Generate a new batch of noise
noise <- rnorm(4 * 10 * num_test, 0, 0.1) %>%
array(dim = c(num_test, 4, 10))
# Sample some labels from p_c and generate images from them
sampled_labels <- sample(0:9, num_test * 4, replace = TRUE) %>%
matrix(ncol = 4) %>%
plyr::aaply(1, transform_to_matrix)
generated_images <- predict(generator, list(noise, sampled_labels))
X <- abind::abind(mnist$test$x, generated_images, along = 1)
y <- matrix(c(rep(1, num_test), rep(0, num_test)), ncol = 1)
aux_y <- abind::abind(mnist$test$y, sampled_labels, along = 1)
# See if the discriminator can figure itself out...
discriminator_test_loss <- evaluate(
discriminator, X, list(y, aux_y),
verbose = FALSE
) %>% unlist()
discriminator_train_loss <- apply(epoch_disc_loss, 2, mean)
# Make new noise
noise <- rnorm(2 * 4 * 10 * num_test, 0, 0.1) %>%
array(dim = c(2 * num_test, 4, 10))
sampled_labels <- sample(0:9, 2 * num_test * 4, replace = TRUE) %>%
matrix(ncol = 4) %>%
plyr::aaply(1, transform_to_matrix)
trick <- matrix(rep(1, 2 * num_test), ncol = 1)
generator_test_loss = combined %>%
evaluate(
list(noise, sampled_labels),
list(trick, sampled_labels),
verbose = FALSE
)
generator_train_loss <- apply(epoch_gen_loss, 2, mean)
report(
generator_train_loss,
generator_test_loss,
discriminator_train_loss,
discriminator_test_loss
)
generate_image()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.