Last active
March 11, 2019 01:20
-
-
Save jtrecenti/149fc2ae4fe66652624f2c37f11d9286 to your computer and use it in GitHub Desktop.
Quebrando Captchas - Parte VI: Redes Generativas Adversariais com Classificador Auxiliar - CODE
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' Author: Julio Trecenti | |
#' Subject: Mnist-Captcha AC-GAN, based on keras example | |
#' https://tensorflow.rstudio.com/keras/articles/examples/mnist_acgan.html | |
# library(tidyverse) | |
library(magrittr) | |
library(keras) | |
k_set_image_data_format('channels_first') | |
# Data preparation ------------------------------------------------------- | |
mnist <- dataset_mnist() | |
one_hot <- function(x) { | |
labs_em_fator <- factor(x, levels = 0:9) | |
m <- model.matrix(~labs_em_fator - 1) | |
attributes(m) <- list(dim = dim(m)) | |
keras::array_reshape(m, c(dim(m)[1], 1, dim(m)[2])) | |
} | |
juntar_xs <- function(x) { | |
rd <- seq_len(nrow(x)) %% 4 | |
a <- abind::abind( | |
x[rd==3,,], x[rd==2,,], x[rd==1,,], x[rd==0,,], | |
along = 3 | |
) | |
attributes(a) <- list(dim = dim(a)) | |
a | |
} | |
juntar_ys <- function(y) { | |
rd <- seq_along(y) %% 4 | |
a <- abind::abind( | |
one_hot(y[rd==3]), one_hot(y[rd==2]), one_hot(y[rd==1]), one_hot(y[rd==0]), | |
along = 2 | |
) | |
attributes(a) <- list(dim = dim(a)) | |
a | |
} | |
mnist$train$x <- juntar_xs(mnist$train$x) | |
mnist$train$y <- juntar_ys(mnist$train$y) | |
mnist$test$x <- juntar_xs(mnist$test$x) | |
mnist$test$y <- juntar_ys(mnist$test$y) | |
num_train <- dim(mnist$train$x)[1] | |
num_test <- dim(mnist$test$x)[1] | |
# from -1 to 1 | |
mnist$train$x <- (mnist$train$x - 127.5)/127.5 | |
mnist$test$x <- (mnist$test$x - 127.5)/127.5 | |
mnist$train$x <- array_reshape(mnist$train$x, c(15000, 1, 28, 28*4)) | |
mnist$test$x <- array_reshape(mnist$test$x, c(2500, 1, 28, 28*4)) | |
# Functions --------------------------------------------------------------- | |
build_generator <- function(){ | |
# We will map a pair of (z, L), where z is a latent vector and L is a | |
# label drawn from P_c, to image space (..., 1, 28, 28) | |
# cnn <- keras_model_sequential() | |
# This is the z space commonly referred to in GAN papers | |
latent <- layer_input(c(4, 10), name = "noise") | |
# This will be our label | |
image_class <- layer_input(c(4L, 10L), name = "sampled_labels") | |
input <- layer_add(list(latent, image_class)) | |
# output <- input %>% | |
# | |
# layer_flatten() %>% | |
# layer_dense(32, activation = "tanh") %>% | |
# layer_dense(32 * 7 * 7, activation = "tanh") %>% | |
# layer_reshape(c(32, 7, 7)) %>% | |
# | |
# layer_upsampling_2d(size = c(2,2)) %>% | |
# layer_conv_2d(16, 3, padding = "same") %>% | |
# layer_activation_leaky_relu() %>% | |
# | |
# layer_upsampling_2d(size = c(2,2)) %>% | |
# layer_conv_2d(8, 3, padding = "same") %>% | |
# layer_activation_leaky_relu() %>% | |
# | |
# layer_upsampling_2d(size = c(1,2)) %>% | |
# layer_conv_2d(4, 3, padding = "same") %>% | |
# layer_activation_leaky_relu() %>% | |
# | |
# layer_conv_2d(1, 3, padding = "same", activation = "tanh") | |
output <- input %>% | |
layer_flatten() %>% | |
layer_dense(4 * 7 * 14, activation = "tanh") %>% | |
layer_reshape(c(4, 7, 14)) %>% | |
layer_conv_2d_transpose(64, 3, padding = "same", strides = c(2, 2)) %>% | |
layer_activation_leaky_relu() %>% | |
layer_conv_2d_transpose(64, 3, padding = "same", strides = c(2, 2)) %>% | |
layer_activation_leaky_relu() %>% | |
layer_conv_2d_transpose(32, 3, padding = "same", strides = c(1, 2)) %>% | |
layer_activation_leaky_relu() %>% | |
# Take a channel axis reduction | |
layer_conv_2d( 1, 3, padding = "same", activation = "tanh") | |
# 36 classes in mnist | |
# cls <- image_class %>% | |
# layer_embedding( | |
# input_dim = 2 * 10, | |
# output_dim = latent_size, | |
# embeddings_initializer = 'glorot_normal' | |
# ) | |
# Hadamard product between z-space and a class conditional embedding] | |
# latent_reshape <- latent %>% | |
# layer_reshape(c(2 * 10, latent_size)) | |
# h <- layer_multiply(list(latent, cls)) | |
keras_model(list(latent, image_class), output) | |
} | |
build_discriminator <- function() { | |
# Build a relatively standard conv net, with LeakyReLUs as suggested in | |
# the reference paper | |
image <- layer_input(shape = c(1, 28, 28*4)) | |
output <- image %>% | |
layer_conv_2d(32, 5, padding = "same") %>% | |
layer_activation_leaky_relu() %>% | |
layer_max_pooling_2d() %>% | |
layer_conv_2d(64, 5, padding = "same") %>% | |
layer_activation_leaky_relu() %>% | |
layer_max_pooling_2d() %>% | |
layer_conv_2d(64, 3, padding = "same") %>% | |
layer_activation_leaky_relu() %>% | |
layer_flatten() %>% | |
layer_dense(64, activation = "relu") %>% | |
layer_dropout(0.2) %>% | |
layer_dense(128, activation = "relu") | |
cnn <- keras_model(image, output) | |
features <- cnn(image) | |
# First output (name=generation) is whether or not the discriminator | |
# thinks the image that is being shown is fake, and the second output | |
# (name=auxiliary) is the class that the discriminator thinks the image | |
# belongs to. | |
fake <- features %>% | |
layer_dense(32, activation = "tanh") %>% | |
layer_dense(1, activation = "sigmoid", name = "generation") | |
aux <- features %>% | |
layer_dense(4 * 10, activation = "relu") %>% | |
layer_reshape(c(4, 10)) %>% | |
layer_activation("softmax", name = "auxiliary") | |
keras_model(image, list(fake, aux)) | |
} | |
# Parameters -------------------------------------------------------------- | |
# Batch and latent size taken from the paper | |
epochs <- 50 | |
batch_size <- 100 | |
adam_lr <- 0.0008 | |
# Model Definition -------------------------------------------------------- | |
# Build the discriminator | |
discriminator <- build_discriminator() | |
discriminator %>% compile( | |
optimizer = optimizer_adam(lr = adam_lr), | |
loss = list("binary_crossentropy", "categorical_crossentropy"), | |
metrics = list("accuracy", "accuracy") | |
) | |
# Build the generator | |
generator <- build_generator() | |
generator %>% compile( | |
optimizer = optimizer_adam(lr = adam_lr), | |
loss = "binary_crossentropy" | |
) | |
latent <- layer_input(shape = list(4L, 10L)) | |
image_class <- layer_input(shape = list(4L, 10L)) | |
fake <- generator(list(latent, image_class)) | |
# Only want to be able to train generation for the combined model | |
freeze_weights(discriminator) | |
results <- discriminator(fake) | |
combined <- keras_model(list(latent, image_class), results) | |
combined %>% compile( | |
optimizer = optimizer_adam(lr = adam_lr), | |
loss = list("binary_crossentropy", "categorical_crossentropy") | |
) | |
combined | |
# Helper Functions ----------------------------------------------------------- | |
transform_to_matrix <- function(x) { | |
labs_em_fator <- factor(x, levels = 0:9) | |
m <- model.matrix(~labs_em_fator - 1) | |
attributes(m) <- list(dim = c(4, 10)) | |
m | |
} | |
generate_image <- function(num = sample(0:9, 4)) { | |
# Noise | |
noise <- rnorm(4 * 10, 0, .01) %>% | |
array(dim = c(4, 10)) %>% | |
array_reshape(c(1, dim(.))) | |
# Digits | |
sampled_labels <- num %>% | |
matrix(ncol = 4) %>% | |
transform_to_matrix() %>% | |
array_reshape(c(1, dim(.))) | |
resp <- paste(c(0:9)[apply(sampled_labels[1,,], 1, which.max)], | |
collapse = "") | |
print(resp) | |
# Apply generator | |
generated_images <- predict( | |
generator, | |
list(noise, sampled_labels) | |
) | |
img <- generated_images[1,1,,] | |
plot(as.raster((img+1)/2)) | |
} | |
generate_image() | |
report <- function(generator_train_loss, generator_test_loss, | |
discriminator_train_loss, discriminator_test_loss) { | |
# Generate an epoch report on performance | |
row_fmt <- "\n%22s : loss %4.2f | %5.2f | %5.2f" | |
row_fmt2 <- "\n%22s : loss %4.2f | %5.2f | %5.2f | %5.2f | %5.2f" | |
cat(sprintf( | |
row_fmt, | |
"generator (train)", | |
generator_train_loss[1], | |
generator_train_loss[2], | |
generator_train_loss[3] | |
)) | |
cat(sprintf( | |
row_fmt, | |
"generator (test)", | |
generator_test_loss[1], | |
generator_test_loss[2], | |
generator_test_loss[3] | |
)) | |
cat(sprintf( | |
row_fmt2, | |
"discriminator (train)", | |
discriminator_train_loss[1], | |
discriminator_train_loss[2], | |
discriminator_train_loss[3], | |
discriminator_train_loss[4], | |
discriminator_train_loss[6] | |
)) | |
cat(sprintf( | |
row_fmt2, | |
"discriminator (test)", | |
discriminator_test_loss[1], | |
discriminator_test_loss[2], | |
discriminator_test_loss[3], | |
discriminator_test_loss[4], | |
discriminator_test_loss[6] | |
)) | |
cat("\n") | |
} | |
# Training ---------------------------------------------------------------- | |
for(epoch in 1:epochs) { | |
num_batches <- trunc(num_train / batch_size) | |
pb <- progress::progress_bar$new( | |
total = num_batches, | |
format = sprintf("epoch %s/%s :elapsed [:bar] :percent :eta", epoch, epochs), | |
clear = FALSE | |
) | |
epoch_gen_loss <- NULL | |
epoch_disc_loss <- NULL | |
possible_indexes <- 1:num_train | |
for(index in 1:num_batches) { | |
pb$tick() | |
# Generate a new batch of noise | |
noise <- rnorm(4 * 10 * batch_size, 0, 0.1) %>% | |
array(dim = c(batch_size, 4, 10)) | |
# Get a batch of real images | |
batch <- sample(possible_indexes, size = batch_size) | |
possible_indexes <- possible_indexes[!possible_indexes %in% batch] | |
image_batch <- mnist$train$x[batch,,,,drop = FALSE] | |
label_batch <- mnist$train$y[batch,,,drop = FALSE] | |
# Sample some labels from p_c | |
sampled_labels <- sample(0:9, batch_size * 4, replace = TRUE) %>% | |
matrix(ncol = 4) %>% | |
plyr::aaply(1, transform_to_matrix) | |
# Generate a batch of fake images, using the generated labels as a | |
# conditioner. We reshape the sampled labels to be | |
# (batch_size, 1) so that we can feed them into the embedding | |
# layer as a length one sequence | |
generated_images <- predict(generator, list(noise, sampled_labels)) | |
X <- abind::abind(image_batch, generated_images, along = 1) | |
y <- matrix(c(rep(1L, batch_size), rep(0L, batch_size)), ncol = 1) | |
aux_y <- abind::abind(label_batch, sampled_labels, along = 1) | |
# Check if the discriminator can figure itself out | |
disc_loss <- train_on_batch(discriminator, x = X, y = list(y, aux_y)) | |
epoch_disc_loss <- rbind(epoch_disc_loss, unlist(disc_loss)) | |
# Make new noise. Generate 2 * batch size here such that | |
# the generator optimizes over an identical number of images as the | |
# discriminator | |
noise <- rnorm(2 * 4 * 10 * batch_size, 0, 0.1) %>% | |
array(dim = c(2 * batch_size, 4, 10)) | |
sampled_labels <- sample(0:9, 2 * batch_size * 4, replace = TRUE) %>% | |
matrix(ncol = 4) %>% | |
plyr::aaply(1, transform_to_matrix) | |
# Want to train the generator to trick the discriminator | |
# For the generator, we want all the {fake, not-fake} labels to say | |
# not-fake | |
trick <- matrix(rep(1, 2 * batch_size), ncol = 1) | |
combined_loss <- train_on_batch( | |
object = combined, | |
x = list(noise, sampled_labels), | |
y = list(trick, sampled_labels) | |
) | |
epoch_gen_loss <- rbind(epoch_gen_loss, unlist(combined_loss)) | |
} | |
cat(sprintf("\nTesting for epoch %02d:", epoch)) | |
# Evaluate the testing loss here | |
# Generate a new batch of noise | |
noise <- rnorm(4 * 10 * num_test, 0, 0.1) %>% | |
array(dim = c(num_test, 4, 10)) | |
# Sample some labels from p_c and generate images from them | |
sampled_labels <- sample(0:9, num_test * 4, replace = TRUE) %>% | |
matrix(ncol = 4) %>% | |
plyr::aaply(1, transform_to_matrix) | |
generated_images <- predict(generator, list(noise, sampled_labels)) | |
X <- abind::abind(mnist$test$x, generated_images, along = 1) | |
y <- matrix(c(rep(1, num_test), rep(0, num_test)), ncol = 1) | |
aux_y <- abind::abind(mnist$test$y, sampled_labels, along = 1) | |
# See if the discriminator can figure itself out... | |
discriminator_test_loss <- evaluate( | |
discriminator, X, list(y, aux_y), | |
verbose = FALSE | |
) %>% unlist() | |
discriminator_train_loss <- apply(epoch_disc_loss, 2, mean) | |
# Make new noise | |
noise <- rnorm(2 * 4 * 10 * num_test, 0, 0.1) %>% | |
array(dim = c(2 * num_test, 4, 10)) | |
sampled_labels <- sample(0:9, 2 * num_test * 4, replace = TRUE) %>% | |
matrix(ncol = 4) %>% | |
plyr::aaply(1, transform_to_matrix) | |
trick <- matrix(rep(1, 2 * num_test), ncol = 1) | |
generator_test_loss = combined %>% | |
evaluate( | |
list(noise, sampled_labels), | |
list(trick, sampled_labels), | |
verbose = FALSE | |
) | |
generator_train_loss <- apply(epoch_gen_loss, 2, mean) | |
report( | |
generator_train_loss, | |
generator_test_loss, | |
discriminator_train_loss, | |
discriminator_test_loss | |
) | |
generate_image() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment