Created
June 17, 2020 18:18
-
-
Save kumeS/3960461230f7be049f9ff7f45da2dcc1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#How to Build Simple Autoencoder with Keras in R | |
#https://www.datatechnotes.com/2020/02/how-to-build-simple-autoencoder-with-keras-in-r.html | |
#Let's play with autoencoders (Keras, R) | |
#https://statslab.eighty20.co.za/posts/autoencoders_keras_r/ | |
#tf.keras.utils.plot_model | |
#https://www.tensorflow.org/versions/r2.0/api_docs/python/tf/keras/utils/plot_model | |
#https://cran.r-project.org/web/packages/imager/vignettes/gettingstarted.html | |
#https://dahtah.github.io/imager/imager.html | |
#https://ryouready.wordpress.com/2014/09/12/using-colorized-png-pictograms-in-r-base-plots/ | |
#rm(list=ls()) | |
library(keras) | |
reticulate::use_python("/usr/local/bin/python", required =T) | |
reticulate::py_config() | |
#手書き文字データMNISTの準備 | |
Data <- dataset_mnist() | |
str(Data) | |
#アレイ型に変換 | |
xtrain <- Data$train$x | |
xtrain <- xtrain/255 | |
ytrain <- Data$train$y | |
xtest <- Data$test$x | |
xtest <- xtest/255 | |
ytest <- Data$test$y | |
# input data size | |
dim(xtrain) | |
input_size <- dim(xtrain)[2]*dim(xtrain)[3] | |
input_size | |
#2次元アレイに変換 | |
dim(xtrain) | |
dim(xtest) | |
x_train = array_reshape(xtrain, dim=c(dim(xtrain)[1], input_size)) | |
x_test = array_reshape(xtest, dim=c(dim(xtest)[1], input_size)) | |
print(dim(x_train)) | |
print(dim(x_test)) | |
##シンプルモデル構築(1) | |
#rm(Autoencoder1) | |
input1 <- layer_input(shape = input_size) | |
output1 <- input1 %>% | |
layer_dense(units=256, activation = "relu") %>% | |
layer_activation_leaky_relu() %>% | |
layer_dense(units=2) %>% | |
layer_activation_leaky_relu() %>% | |
layer_dense(units=256, activation = "relu") %>% | |
layer_activation_leaky_relu() %>% | |
layer_dense(units = input_size, activation = "sigmoid") %>% | |
layer_activation_leaky_relu() | |
Autoencoder1 <- keras_model(input1, output1) | |
summary(Autoencoder1) | |
#source("./DL_plot_modi_03.R") | |
source("https://gist.githubusercontent.com/kumeS/41fed511efb45bd55d468d4968b0f157/raw/07da3ba4a2e477f352d03e8b5ac00d394fe9afec/DL_plot_modi_v1.1.R") | |
modelplot <- Autoencoder1 | |
modelplot %>% plot_model_modi(width=1, height=1.25) | |
#Python Tensorflowのplot_modelを使う | |
#reticulate::use_python("/usr/local/bin/python", required =T) | |
tf <- reticulate::import(module = "tensorflow") | |
py_plot_model <- tf$keras$utils$plot_model | |
py_plot_model(modelplot, to_file='Autoencoder1_tf.png', | |
show_shapes=T, show_layer_names=T, | |
expand_nested=T, dpi=100) | |
## compile & fit | |
Autoencoder1 %>% compile(optimizer="rmsprop", loss="mean_squared_error") | |
Autoencoder1 %>% fit(x_train, x_train, epochs=100, batch_size=1000) | |
#AutoencoderによるTrainデータセットの変換結果を確認する | |
pred_imgs1 <- Autoencoder1 %>% predict(x_train) | |
pred_imgsR1 <- array_reshape(pred_imgs1, dim=c(dim(pred_imgs1)[1], 28, 28)) | |
dim(pred_imgsR1) | |
#if (!requireNamespace("BiocManager", quietly = TRUE)) | |
# install.packages("BiocManager") | |
#BiocManager::install("EBImage") | |
library(EBImage) | |
par(mfrow=c(3,2)) | |
for (i in 1:6) { | |
m <- sample(1:dim(xtrain)[1], 1, replace = F) | |
display(combine(t(xtrain[m,,]), t(pred_imgsR1[m,,])), | |
method="raster", nx=2, all=TRUE, spacing = 0.01, margin = 2) | |
} | |
##中間層の表示 | |
intermediate_layer <- keras_model(inputs = Autoencoder1$input, | |
outputs = get_layer(Autoencoder1, "dense_1")$output) | |
summary(intermediate_layer) | |
intermediate_output <- predict(intermediate_layer, x_train) | |
#2Dプロット | |
xy <- data.frame(ytrain, intermediate_output) | |
Sam <- sample(1:nrow(xy), 500, replace = F) | |
xy1 <- xy[Sam,] | |
par(mfrow=c(1,1), mai=c(0.75,0.75,0.2,0.2), mgp = c(2.5,1,0)) | |
plot(xy1[,2:3], pch=21, cex=0.75, bg=rainbow(10)[xy1[,1]+1]) | |
#実際の画像での2Dプロット | |
xy2 <- xtrain[Sam,,] | |
##プロット全体のXY座標を調べる | |
a <- range(xy1[,2][is.finite(xy1[,2])]) | |
b <- range(xy1[,3][is.finite(xy1[,3])]) | |
a1 <- diff(a)*0.015 | |
b1 <- diff(b)*0.015 | |
##そこに、画像を色付きでプロットする | |
for(n in 1:nrow(xy1)){ | |
#n <-4 | |
v <- col2rgb(rainbow(10)[xy1[n,1] + 1]) / 255 | |
img = channel(xy2[n,,], 'rgb') | |
img[,,1] <- img[,,1]*v[1] | |
img[,,2] <- img[,,2]*v[2] | |
img[,,3] <- img[,,3]*v[3] | |
ff <- t(as.raster(img)) | |
ff[ff == "#000000"] <- "#00000000" | |
rasterImage(ff, xy1[n,2]-a1, xy1[n,3]-b1, | |
xy1[n,2]+a1, xy1[n,3]+b1) | |
} | |
#まずは、空白のプロットをしてみる | |
plot(xy1[,2:3], pch=21, cex=0.1, col="white") | |
##そこに、画像を色付きでプロットする | |
for(n in 1:nrow(xy1)){ | |
#n <-4 | |
v <- col2rgb(rainbow(10)[xy1[n,1] + 1]) / 255 | |
img = channel(xy2[n,,], 'rgb') | |
img[,,1] <- img[,,1]*v[1] | |
img[,,2] <- img[,,2]*v[2] | |
img[,,3] <- img[,,3]*v[3] | |
ff <- t(as.raster(img)) | |
ff[ff == "#000000"] <- "#00000000" | |
rasterImage(ff, xy1[n,2]-1.5, xy1[n,3]-1.5, | |
xy1[n,2]+1.5, xy1[n,3]+1.5) | |
} | |
#Test | |
#pred_imgs = Autoencoder %>% predict(x_test) | |
#pred_imgsR = array_reshape(pred_imgs, dim=c(dim(pred_imgs)[1], 28, 28)) | |
#rasterImage(image, | |
# xleft, ybottom, xright, ytop, | |
# angle = 0, interpolate = TRUE) | |
##02 | |
##一度、Rセッションを初期化するのが無難 | |
.rs.restartR() | |
library(keras) | |
library(EBImage) | |
reticulate::use_python("/usr/local/bin/python", required =T) | |
##別モデルの構築 | |
input2 <- layer_input(shape = input_size) | |
output2 <- input2 %>% | |
layer_dense(units = 256, activation = "relu") %>% | |
layer_dropout(rate = 0.2) %>% | |
layer_dense(units = 128, activation = "relu") %>% | |
layer_dropout(rate = 0.1) %>% | |
layer_dense(units = 64, activation = "relu") %>% | |
layer_dense(units = 2, activation = "relu") %>% | |
layer_dense(units = 64, activation = "relu") %>% | |
layer_dropout(rate = 0.1) %>% | |
layer_dense(units = 128, activation = "relu") %>% | |
layer_dropout(rate = 0.2) %>% | |
layer_dense(units = 256, activation = "relu") %>% | |
layer_dense(units = input_size, activation = "relu") | |
Autoencoder2 <- keras_model(input2, output2) | |
summary(Autoencoder2) | |
#source("./DL_plot_modi_03.R") | |
modelplot <- Autoencoder2 | |
modelplot %>% plot_model_modi(width=1, height=1.25) | |
#Python Tensorflowのplot_modelを使う | |
#reticulate::use_python("/usr/local/bin/python", required =T) | |
tf <- reticulate::import(module = "tensorflow") | |
py_plot_model <- tf$keras$utils$plot_model | |
py_plot_model(modelplot, to_file='Autoencoder2_tf.png', | |
show_shapes=T, show_layer_names=T, | |
expand_nested=T, dpi=100) | |
#compile | |
Autoencoder2 %>% | |
compile(optimizer="rmsprop", loss="mean_squared_error") | |
#Fit | |
Autoencoder2 %>% | |
fit(x_train, x_train, epochs=100, batch_size=1000, shuffle=TRUE) | |
#AutoencoderによるTrainデータセットの変換結果を確認する | |
pred_imgs2 <- Autoencoder2 %>% predict(x_train) | |
pred_imgsR2 <- array_reshape(pred_imgs2, dim=c(dim(pred_imgs2)[1], 28, 28)) | |
dim(pred_imgsR2) | |
#if (!requireNamespace("BiocManager", quietly = TRUE)) | |
# install.packages("BiocManager") | |
#BiocManager::install("EBImage") | |
library(EBImage) | |
par(mfrow=c(3,2)) | |
for (i in 1:6) { | |
m <- sample(1:dim(xtrain)[1], 1, replace = F) | |
display(combine(t(xtrain[m,,]), t(pred_imgsR2[m,,])), | |
method="raster", nx=2, all=TRUE, spacing = 0.01, margin = 2) | |
} | |
##中間層の表示 | |
summary(Autoencoder2) | |
intermediate_layer <- keras_model(inputs = Autoencoder2$input, | |
outputs = get_layer(Autoencoder2, "dense_3")$output) | |
summary(intermediate_layer) | |
intermediate_output <- predict(intermediate_layer, x_train) | |
#2Dプロット | |
xy <- data.frame(ytrain, intermediate_output) | |
Sam <- sample(1:nrow(xy), 500, replace = F) | |
xy1 <- xy[Sam,] | |
par(mfrow=c(1,1), mai=c(0.75,0.75,0.2,0.2), mgp = c(2.5,1,0)) | |
plot(xy1[,2:3], pch=21, cex=0.75, bg=rainbow(10)[xy1[,1]+1]) | |
#点がゼロ付近に固まっているので | |
#log10をとると、結構いい感じでプロットされる | |
xy1[,2:3] <- log10(xy1[,2:3]) | |
plot(xy1[,2:3], pch=21, cex=0.75, bg=rainbow(10)[xy1[,1]+1]) | |
#実際の画像での2Dプロット | |
xy2 <- xtrain[Sam,,] | |
#まずは、空白のプロットをしてみる | |
plot(xy1[,2:3], pch=21, cex=0.1, col="white") | |
##プロット全体のXY座標サイズを考慮して、全体の3%の大きさでプロットする | |
a <- range(xy1[,2][is.finite(xy1[,2])]) | |
b <- range(xy1[,3][is.finite(xy1[,3])]) | |
a1 <- diff(a)*0.015 | |
b1 <- diff(b)*0.015 | |
##そこに、画像を色付きでプロットする | |
for(n in 1:nrow(xy1)){ | |
#n <-4 | |
v <- col2rgb(rainbow(10)[xy1[n,1] + 1]) / 255 | |
img = channel(xy2[n,,], 'rgb') | |
img[,,1] <- img[,,1]*v[1] | |
img[,,2] <- img[,,2]*v[2] | |
img[,,3] <- img[,,3]*v[3] | |
ff <- t(as.raster(img)) | |
ff[ff == "#000000"] <- "#00000000" | |
rasterImage(ff, xy1[n,2]-a1, xy1[n,3]-b1, | |
xy1[n,2]+a1, xy1[n,3]+b1) | |
} | |
##03 | |
##一度、Rセッションを初期化するのが無難 | |
.rs.restartR() | |
library(keras) | |
library(EBImage) | |
reticulate::use_python("/usr/local/bin/python", required =T) | |
##別モデルの構築 | |
#活性化関数は、`relu`にしている。 | |
input3 <- layer_input(shape = input_size) | |
output3 <- input3 %>% | |
layer_dense(units = 1000, activation = "relu") %>% | |
layer_dense(units = 500, activation = "relu") %>% | |
layer_dense(units = 250, activation = "relu") %>% | |
layer_dense(units = 2, activation = "relu") %>% | |
layer_dense(units = 250, activation = "relu") %>% | |
layer_dense(units = 500, activation = "relu") %>% | |
layer_dense(units = 100, activation = "relu") %>% | |
layer_dense(units = input_size, activation = "relu") | |
Autoencoder3 <- keras_model(input3, output3) | |
summary(Autoencoder3) | |
##いまさらながら、モデルプロットの範囲をしていできた。 | |
modelplot <- Autoencoder3 | |
modelplot %>% plot_model_modi(width=1, height=1.25) | |
#compile | |
Autoencoder3 %>% | |
compile(optimizer="rmsprop", loss="mean_squared_error") | |
#Fit | |
Autoencoder3 %>% | |
fit(x_train, x_train, epochs=100, batch_size=1000, shuffle=TRUE) | |
#AutoencoderによるTrainデータセットの変換結果を確認する | |
pred_imgs3 <- Autoencoder3 %>% predict(x_train) | |
pred_imgsR3 <- array_reshape(pred_imgs3, dim=c(dim(pred_imgs3)[1], 28, 28)) | |
dim(pred_imgsR3) | |
#if (!requireNamespace("BiocManager", quietly = TRUE)) | |
# install.packages("BiocManager") | |
#BiocManager::install("EBImage") | |
#library(EBImage) | |
par(mfrow=c(3,2)) | |
for (i in 1:6) { | |
m <- sample(1:dim(xtrain)[1], 1, replace = F) | |
display(combine(t(xtrain[m,,]), t(pred_imgsR3[m,,])), | |
method="raster", nx=2, all=TRUE, spacing = 0.01, margin = 2) | |
} | |
##中間層の表示 | |
summary(Autoencoder3) | |
intermediate_layer <- keras_model(inputs = Autoencoder3$input, | |
outputs = get_layer(Autoencoder3, "dense_3")$output) | |
summary(intermediate_layer) | |
intermediate_output <- predict(intermediate_layer, x_train) | |
#2Dプロット | |
xy <- data.frame(ytrain, intermediate_output) | |
Sam <- sample(1:nrow(xy), 500, replace = F) | |
xy1 <- xy[Sam,] | |
xy1[,2:3] <- log10(xy1[,2:3]) | |
xy2 <- xtrain[Sam,,] | |
par(mfrow=c(1,1), mai=c(0.75,0.75,0.2,0.2), mgp = c(2,1,0)) | |
plot(xy1[,2:3], pch=21, cex=0.1, col="white") | |
a <- range(xy1[,2][is.finite(xy1[,2])]) | |
b <- range(xy1[,3][is.finite(xy1[,3])]) | |
a1 <- diff(a)*0.015 | |
b1 <- diff(b)*0.015 | |
for(n in 1:nrow(xy1)){ | |
#n <-4 | |
v <- col2rgb(rainbow(10)[xy1[n,1] + 1]) / 255 | |
img = channel(xy2[n,,], 'rgb') | |
img[,,1] <- img[,,1]*v[1] | |
img[,,2] <- img[,,2]*v[2] | |
img[,,3] <- img[,,3]*v[3] | |
ff <- t(as.raster(img)) | |
ff[ff == "#000000"] <- "#00000000" | |
rasterImage(ff, xy1[n,2]-a1, xy1[n,3]-b1, | |
xy1[n,2]+a1, xy1[n,3]+b1) | |
} | |
legend("topleft", legend = c(0:9), cex=0.6, pch=NA, text.col = rainbow(10), bg = "black") | |
#legend(locator(1), legend = c(0:9), pch=NA, text.col = rainbow(10), bg = "black") | |
##04 | |
##一度、Rセッションを初期化するのが無難 | |
.rs.restartR() | |
library(keras) | |
library(EBImage) | |
reticulate::use_python("/usr/local/bin/python", required =T) | |
##別モデルの構築 | |
input4 <- layer_input(shape = input_size) | |
output4 <- input4 %>% | |
layer_dense(units = 1000, activation = "tanh") %>% | |
layer_dense(units = 500, activation = "tanh") %>% | |
layer_dense(units = 250, activation = "tanh") %>% | |
layer_dense(units = 2, activation = "tanh") %>% | |
layer_dense(units = 250, activation = "tanh") %>% | |
layer_dense(units = 500, activation = "tanh") %>% | |
layer_dense(units = 100, activation = "tanh") %>% | |
layer_dense(units = input_size, activation = "tanh") | |
Autoencoder4 <- keras_model(input4, output4) | |
summary(Autoencoder4) | |
Autoencoder4 <- keras_model(input4, output4) | |
summary(Autoencoder4) | |
##いまさらながら、モデルプロットの範囲をしていできた。 | |
modelplot <- Autoencoder4 | |
modelplot %>% plot_model_modi(width=1, height=1.25) | |
#compile | |
Autoencoder4 %>% | |
compile(optimizer="rmsprop", loss="mean_squared_error") | |
#Fit | |
Autoencoder4 %>% | |
fit(x_train, x_train, epochs=100, batch_size=1000, shuffle=TRUE) | |
#AutoencoderによるTrainデータセットの変換結果を確認する | |
pred_imgs4 <- Autoencoder4 %>% predict(x_train) | |
pred_imgsR4 <- array_reshape(pred_imgs4, dim=c(dim(pred_imgs4)[1], 28, 28)) | |
dim(pred_imgsR4) | |
#if (!requireNamespace("BiocManager", quietly = TRUE)) | |
# install.packages("BiocManager") | |
#BiocManager::install("EBImage") | |
library(EBImage) | |
par(mfrow=c(3,2)) | |
for (i in 1:6) { | |
m <- sample(1:dim(xtrain)[1], 1, replace = F) | |
display(combine(t(xtrain[m,,]), t(pred_imgsR4[m,,])), | |
method="raster", nx=2, all=TRUE, spacing = 0.01, margin = 2) | |
} | |
##中間層の表示 | |
summary(Autoencoder4) | |
intermediate_layer <- keras_model(inputs = Autoencoder4$input, | |
outputs = get_layer(Autoencoder4, "dense_3")$output) | |
summary(intermediate_layer) | |
intermediate_output <- predict(intermediate_layer, x_train) | |
#2Dプロット | |
xy <- data.frame(ytrain, intermediate_output) | |
Sam <- sample(1:nrow(xy), 500, replace = F) | |
xy1 <- xy[Sam,] | |
xy1[,2:3] <- log10(xy1[,2:3]) | |
plot(xy1[,2:3], pch=21, cex=0.75, bg=rainbow(10)[xy1[,1]+1]) | |
#実際の画像での2Dプロット | |
xy2 <- xtrain[Sam,,] | |
#まずは、空白のプロットをしてみる | |
plot(xy1[,2:3], pch=21, cex=0.1, col="white") | |
##プロット全体のXY座標サイズを考慮して、全体の3%の大きさでプロットする | |
a <- range(xy1[,2][is.finite(xy1[,2])]) | |
b <- range(xy1[,3][is.finite(xy1[,3])]) | |
a1 <- diff(a)*0.015 | |
b1 <- diff(b)*0.015 | |
##そこに、画像を色付きでプロットする | |
for(n in 1:nrow(xy1)){ | |
#n <-4 | |
v <- col2rgb(rainbow(10)[xy1[n,1] + 1]) / 255 | |
img = channel(xy2[n,,], 'rgb') | |
img[,,1] <- img[,,1]*v[1] | |
img[,,2] <- img[,,2]*v[2] | |
img[,,3] <- img[,,3]*v[3] | |
ff <- t(as.raster(img)) | |
ff[ff == "#000000"] <- "#00000000" | |
rasterImage(ff, xy1[n,2]-a1, xy1[n,3]-b1, | |
xy1[n,2]+a1, xy1[n,3]+b1) | |
} | |
#||Final loss| | |
#|:-:|:-:| | |
#|Model 1 | 0.0397 | | |
#|Model 2 | 0.0378 | | |
#|Model 3| 0.0358 | | |
##Weights全体を取得する場合 | |
autoencoder2_weights <- Autoencoder2 %>% keras::get_weights() | |
str(autoencoder2_weights) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment