Created
November 19, 2020 20:28
-
-
Save kstawiski/c3dfed76406f50d5cdf78bba07cf049e to your computer and use it in GitHub Desktop.
OmicSelector DeepLearning snippets
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(OmicSelector); set.seed(1); | |
if(!dir.exists("/OmicSelector/temp")) { dir.create("/OmicSelector/temp") } | |
OmicSelector_load_extension("deeplearning") | |
library(data.table) | |
#use_session_with_seed(1) | |
#selected_miRNAs = c("hsa.miR.192.5p", | |
# "hsa.let.7g.5p", | |
# "hsa.let.7a.5p", | |
# "hsa.let.7d.5p", | |
# "hsa.miR.194.5p", | |
# "hsa.miR.98.5p", | |
# "hsa.let.7f.5p", | |
# "hsa.miR.122.5p", | |
# "hsa.miR.340.5p", | |
# "hsa.miR.26b.5p" | |
# ,"hsa.miR.17.5p", | |
# "hsa.miR.199a.3p", | |
# "hsa.miR.28.3p", | |
# "hsa.miR.92a.3p" | |
#) | |
balanced = F | |
nazwa_konfiguracji = "pancreatic_ourseq.csv" | |
t = data.table::fread("mixed_train.csv") | |
studyMirs = make.names(c('hsa-miR-192-5p', 'hsa-let-7g-5p', 'hsa-let-7a-5p', 'hsa-let-7d-5p', 'hsa-miR-194-5p', 'hsa-miR-98-5p', 'hsa-let-7f-5p', 'hsa-miR-122-5p', 'hsa-miR-340-5p', 'hsa-miR-26b-5p')) | |
norm3_1 = make.names(c('hsa-miR-17-5p', 'hsa-miR-92a-3p', 'hsa-miR-199a-3p')) | |
norm2 = make.names(c('hsa-miR-28-3p', 'hsa-miR-92a-3p')) | |
normQ = make.names('hsa-miR-23a-3p') | |
all_norm = make.names(c('hsa-miR-17-5p', 'hsa-miR-92a-3p', 'hsa-miR-199a-3p', 'hsa-miR-23a-3p', 'hsa-miR-28-3p')) | |
# study | |
selected_miRNAs = studyMirs | |
# autoencoder ma softmax na deep feature | |
hyperparameters_part1 = expand.grid(layer1 = seq(2,10, by = 1), layer2 = c(0), layer3 = c(0), | |
activation_function_layer1 = c("relu","sigmoid","selu"), activation_function_layer2 = c("relu"), activation_function_layer3 = c("relu"), | |
dropout_layer1 = c(0, 0.1), dropout_layer2 = c(0), dropout_layer3 = c(0), | |
layer1_regularizer = c(T,F), layer2_regularizer = c(F), layer3_regularizer = c(F), | |
optimizer = c("adam","rmsprop","sgd"), autoencoder = c(0), balanced = balanced, formula = as.character(OmicSelector_create_formula(selected_miRNAs))[3], scaled = c(T), | |
stringsAsFactors = F) | |
hyperparameters_part2 = expand.grid(layer1 = seq(3,11, by = 2), layer2 = c(seq(3,11, by = 2)), layer3 = c(seq(0,11, by = 2)), | |
activation_function_layer1 = c("relu","sigmoid","selu"), activation_function_layer2 = c("relu","sigmoid","selu"), activation_function_layer3 = c("relu","sigmoid","selu"), | |
dropout_layer1 = c(0, 0.1), dropout_layer2 = c(0), dropout_layer3 = c(0), | |
layer1_regularizer = c(T,F), layer2_regularizer = c(F), layer3_regularizer = c(F), | |
optimizer = c("adam","rmsprop","sgd"), autoencoder = c(0), balanced = balanced, formula = as.character(OmicSelector_create_formula(selected_miRNAs))[3], scaled = c(T,F), | |
stringsAsFactors = F) | |
hyperparameters = rbind(hyperparameters_part1, hyperparameters_part2) | |
head(hyperparameters) | |
ile = nrow(hyperparameters) | |
ile_w_batchu = 250 | |
cat(paste0("\nHow many to check: ", ile)) | |
if(!file.exists(nazwa_konfiguracji)) { | |
batch_start = 1 | |
} else { | |
tempres = data.table::fread(nazwa_konfiguracji) | |
last = as.numeric(str_split(tempres$model_id, "-")[[nrow(tempres)]][1]) | |
if(last >= ile) { stop(paste0(last, " - Task already finished.")) } | |
batch_start = as.numeric(last)+1 | |
} | |
ile_batchy = ceiling(nrow(hyperparameters)/ile_w_batchu - batch_start/ile_w_batchu) | |
cat(paste0("\nBatch start: ", batch_start)) | |
cat(paste0("\nHow many in batch: ", ile_w_batchu)) | |
for (i in 1:ile_batchy) { | |
batch_end = batch_start + (ile_w_batchu-1) | |
if (batch_end > ile) { batch_end = ile } | |
cat(paste0("\n\nProcessing batch no ", i , " of ", ile_batchy, " (", batch_start, "-", batch_end, ")")) | |
OmicSelector_deep_learning(selected_miRNAs = selected_miRNAs, wd = getwd(), save_threshold_trainacc = 0.75, save_threshold_testacc = 0.7, hyperparameters = hyperparameters, | |
SMOTE = balanced, start = batch_start, end = batch_end, output_file = nazwa_konfiguracji, keras_threads = 30, | |
keras_epoch = 2000, keras_patience = 100, automatic_weight = F) | |
batch_start = batch_end + 1 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# remotes::install_github("kstawiski/OmicSelector", ref = "dev") | |
setwd("~/snorlax/DOKTORAT/transfer_learning/pancreatic/transfer_1") | |
library(OmicSelector) | |
library(dplyr) | |
library(ggplot2) | |
library("scatterplot3d") | |
OmicSelector_load_extension("deeplearning") | |
# Get TCGA results | |
tcga_models = data.table::fread("../tcga/pancreatic_tcga.csv") | |
ourseq_models = data.table::fread("../circulating_our/pancreatic_ourseq.csv") | |
tcga_models$task = "TCGA" | |
ourseq_models$task = "Circulating miRNA-seq" | |
tcga_models$no_layers = 1 | |
tcga_models$no_layers[tcga_models$layer2 != 0] = 2 | |
tcga_models$no_layers[tcga_models$layer3 != 0] = 3 | |
ourseq_models$no_layers = 1 | |
ourseq_models$no_layers[tcga_models$layer2 != 0] = 2 | |
ourseq_models$no_layers[tcga_models$layer3 != 0] = 3 | |
tcga_models$hyperparameter_id = 1:nrow(tcga_models) | |
ourseq_models$hyperparameter_id = 1:nrow(ourseq_models) | |
tcga_models$metaindex = (tcga_models$training_Accuracy + tcga_models$test_Accuracy + tcga_models$valid_Accuracy) / 3 | |
hist(tcga_models$metaindex) | |
summary(tcga_models$metaindex) | |
tcga_models = arrange(tcga_models, desc(metaindex)) | |
top_tcga_models = tcga_models[1:100,] | |
plot(top_tcga_models$training_Accuracy, top_tcga_models$valid_Accuracy) | |
plot(top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy) | |
plot(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy) | |
scatterplot3d(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy, angle = 45, pch = 16, | |
grid=TRUE, box=FALSE, xlab = "Trainign Accuracy", ylab = "Testing Accuracy", zlab = "Validation Accuracy") | |
ourseq_models$metaindex = (ourseq_models$training_Accuracy + ourseq_models$test_Accuracy + ourseq_models$valid_Accuracy) / 3 | |
hist(ourseq_models$metaindex) | |
summary(ourseq_models$metaindex) | |
ourseq_models = arrange(ourseq_models, desc(metaindex)) | |
top_ourseq_models = ourseq_models[1:100,] | |
plot(top_ourseq_models$training_Accuracy, top_ourseq_models$valid_Accuracy) | |
plot(top_ourseq_models$test_Accuracy, top_ourseq_models$valid_Accuracy) | |
plot(top_ourseq_models$training_Accuracy, top_ourseq_models$test_Accuracy) | |
scatterplot3d(top_ourseq_models$training_Accuracy, top_ourseq_models$test_Accuracy, top_ourseq_models$valid_Accuracy, angle = 45, pch = 16, | |
grid=TRUE, box=FALSE, xlab = "Trainign Accuracy", ylab = "Testing Accuracy", zlab = "Validation Accuracy") | |
# Which task was easier? | |
temp = rbind(tcga_models, ourseq_models) | |
boxplot(temp$metaindex ~ temp$task) | |
p <- ggplot(temp, aes(x=task, y=metaindex)) + | |
geom_violin(trim=FALSE, fill="gray") + | |
labs(title="Which task was easier?",x="Task", y = "Metaindex (avarage accuracy across datasets)")+ | |
geom_boxplot(width=0.1)+ | |
theme_classic() | |
p | |
t.test(temp$metaindex ~ temp$task) | |
wilcox.test(temp$metaindex ~ temp$task) | |
# In which task top 100 models performed better? | |
temp = rbind(top_tcga_models, top_ourseq_models) | |
boxplot(temp$metaindex ~ temp$task) | |
p <- ggplot(temp, aes(x=task, y=metaindex)) + | |
geom_violin(trim=FALSE, fill="gray") + | |
labs(title="Which task was easier?",x="Task", y = "Metaindex (avarage accuracy across datasets)")+ | |
geom_boxplot(width=0.1)+ | |
theme_classic() | |
p | |
t.test(temp$metaindex ~ temp$task) | |
wilcox.test(temp$metaindex ~ temp$task) | |
# Are there significant differences in the structure of top 100 models between tasks? | |
temp = rbind(top_tcga_models, top_ourseq_models) | |
# No layers | |
temp$no_layers = 1 | |
temp$no_layers[temp$layer2 != 0] = 2 | |
temp$no_layers[temp$layer3 != 0] = 3 | |
hist(temp$no_layers) | |
table(temp$no_layers, temp$task) | |
chisq.test(temp$no_layers, temp$task) | |
table(tcga_models$no_layers) | |
layer3_ref = 81000/nrow(tcga_models) | |
prop.test(85, 100, p=layer3_ref) | |
prop.test(80, 100, p=layer3_ref) | |
OmicSelector_correlation_plot(temp$no_layers, temp$metaindex, "Number of layers", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
## Layers | |
OmicSelector_correlation_plot(tcga_models$layer1, tcga_models$metaindex, "Layer 1", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
OmicSelector_correlation_plot(tcga_models$layer2, tcga_models$metaindex, "Layer 2", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
OmicSelector_correlation_plot(tcga_models$layer3, tcga_models$metaindex, "Layer 3", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
boxplot(tcga_models$metaindex ~ tcga_models$layer1) | |
boxplot(tcga_models$metaindex ~ tcga_models$layer2) | |
boxplot(tcga_models$metaindex ~ tcga_models$layer3) | |
OmicSelector_correlation_plot(ourseq_models$layer1, ourseq_models$metaindex, "Layer 1", "Metaindex", "Circulating miRNA-seq: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
OmicSelector_correlation_plot(ourseq_models$layer2, ourseq_models$metaindex, "Layer 2", "Metaindex", "Circulating miRNA-seq: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
OmicSelector_correlation_plot(ourseq_models$layer3, ourseq_models$metaindex, "Layer 3", "Metaindex", "Circulating miRNA-seq: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft") | |
boxplot(ourseq_models$metaindex ~ ourseq_models$layer1) | |
boxplot(ourseq_models$metaindex ~ ourseq_models$layer2) | |
boxplot(ourseq_models$metaindex ~ ourseq_models$layer3) | |
boxplot(temp$layer1 ~ temp$task) | |
t.test(temp$layer1 ~ temp$task) | |
boxplot(temp$layer2 ~ temp$task) | |
t.test(temp$layer2 ~ temp$task) | |
boxplot(temp$layer3 ~ temp$task) | |
t.test(temp$layer3 ~ temp$task) | |
## Activation function | |
table(temp$activation_function_layer1, temp$task) | |
chisq.test(temp$activation_function_layer1, temp$task) | |
table(temp$activation_function_layer2, temp$task) | |
chisq.test(temp$activation_function_layer2, temp$task) | |
table(temp$activation_function_layer3, temp$task) | |
chisq.test(temp$activation_function_layer3, temp$task) | |
## Dropout | |
boxplot(tcga_models$metaindex ~ tcga_models$dropout_layer1) | |
t.test(tcga_models$metaindex ~ tcga_models$dropout_layer1) | |
boxplot(ourseq_models$metaindex ~ ourseq_models$dropout_layer1) | |
t.test(ourseq_models$metaindex ~ ourseq_models$dropout_layer1) | |
table(temp$dropout_layer1, temp$task) | |
chisq.test(temp$dropout_layer1, temp$task) | |
## Regularization | |
boxplot(tcga_models$metaindex ~ tcga_models$layer1_regularizer) | |
t.test(tcga_models$metaindex ~ tcga_models$layer1_regularizer) | |
boxplot(ourseq_models$metaindex ~ ourseq_models$layer1_regularizer) | |
t.test(ourseq_models$metaindex ~ ourseq_models$layer1_regularizer) | |
table(temp$task, temp$layer1_regularizer) | |
chisq.test(temp$task, temp$layer1_regularizer) | |
## Optimizer | |
boxplot(tcga_models$metaindex ~ tcga_models$optimizer) | |
a = aov(tcga_models$metaindex ~ tcga_models$optimizer) | |
summary(a) | |
TukeyHSD(a) | |
boxplot(ourseq_models$metaindex ~ ourseq_models$optimizer) | |
psych::describeBy(ourseq_models$metaindex, ourseq_models$optimizer, digits = 4, mat = T) | |
a = aov(ourseq_models$metaindex ~ ourseq_models$optimizer) | |
summary(a) | |
TukeyHSD(a) | |
table(temp$task, temp$optimizer) | |
chisq.test(temp$task, temp$optimizer) | |
## Scaling | |
boxplot(tcga_models$metaindex ~ tcga_models$scaled) | |
t.test(tcga_models$metaindex ~ tcga_models$scaled) | |
boxplot(ourseq_models$metaindex ~ ourseq_models$scaled) | |
t.test(ourseq_models$metaindex ~ ourseq_models$scaled) | |
table(temp$task, temp$scaled) | |
chisq.test(temp$task, temp$scaled) | |
# Paired structure analysis | |
models_paired = top_tcga_models | |
to_join = ourseq_models[match(top_tcga_models$hyperparameter_id, ourseq_models$hyperparameter_id),] %>% dplyr::rename_all(function(x) paste0("circ_", x)) | |
models_paired = cbind(models_paired, to_join) | |
suppressMessages(library(PairedData)) | |
suppressMessages(library(profileR)) | |
pd = paired(as.numeric(models_paired$metaindex)[1:50], as.numeric(models_paired$circ_metaindex)[1:50]) | |
colnames(pd) = c("Metaindex TCGA", "Metaindex circulating miRNA-seq") | |
plot2 = OmicSelector_profileplot(pd, Method.id = models_paired$name[1:50], standardize = F) | |
plot2 | |
# Transfer network performance analysis | |
temp1 = data.table::fread("transfered_networks_structure.csv") | |
temp1$transfer_type = "structure" | |
temp2 = data.table::fread("transfered_networks_layer1.csv") | |
temp2$transfer_type = "layer1" | |
temp3 = data.table::fread("transfered_networks_layer2.csv") | |
temp3$transfer_type = "layer2" | |
temp4 = data.table::fread("transfered_networks_layer3.csv") | |
temp4$transfer_type = "layer3" | |
transfer_performance = rbind(temp1, temp2, temp3, temp4) | |
# meta = apply(expand.grid(transfer_performance$transfer_type, transfer_performance$type), 1, paste, collapse="_") | |
transfer_performance$meta = paste0(transfer_performance$transfer_type, "_", transfer_performance$type) | |
transfer_performance = transfer_performance[transfer_performance$meta != "layer1_crude",] | |
transfer_performance = transfer_performance[transfer_performance$meta != "layer2_crude",] | |
transfer_performance = transfer_performance[transfer_performance$meta != "layer3_crude",] | |
transfer_performance$meta = as.factor(transfer_performance$meta) | |
transfer_performance$meta = factor(transfer_performance$meta, levels = c("structure_crude", "layer1_recalibrated", "layer2_recalibrated", "layer3_recalibrated", "structure_recalibrated")) | |
transfer_performance$meta | |
transfer_performance$metaindex = (transfer_performance$new_training_Accuracy + transfer_performance$new_test_Accuracy + transfer_performance$new_valid_Accuracy) / 3 | |
boxplot(transfer_performance$metaindex ~ transfer_performance$meta) | |
oneway.test(transfer_performance$metaindex ~ transfer_performance$meta) | |
a = aov(transfer_performance$metaindex ~ transfer_performance$meta) | |
TukeyHSD(a) | |
psych::describeBy(transfer_performance$metaindex, transfer_performance$meta, mat = T, digits = 4) | |
merged = merge(transfer_performance, ourseq_models, | |
by = c('layer1','layer2','layer3','activation_function_layer1','activation_function_layer2','activation_function_layer3','dropout_layer1','dropout_layer2','dropout_layer3','layer1_regularizer','layer2_regularizer','layer3_regularizer','optimizer','autoencoder','balanced', 'scaled'), | |
suffixes = c("_TCGA", "_circ"), all.x = TRUE) | |
colnames(merged) | |
merged$metaindex_circ | |
merged$metaindex = (transfer_performance$new_training_Accuracy + transfer_performance$new_test_Accuracy + transfer_performance$new_valid_Accuracy) / 3 | |
# merged$metaindex11 = (merged$new_test_Accuracy + merged$new_valid_Accuracy) / 2 | |
# merged$metaindex11_circ = (merged$test_Accuracy_circ + merged$valid_Accuracy_circ ) / 2 | |
merged$metaindex11 = (merged$new_test_F1 + merged$new_valid_F1) / 2 | |
merged$metaindex11_circ = (merged$test_F1_circ + merged$valid_F1_circ ) / 2 | |
boxplot(merged$metaindex_circ, merged$metaindex) | |
t.test(merged$metaindex_circ, merged$metaindex) | |
boxplot(merged$metaindex_circ[merged$meta == "structure_recalibrated"], merged$metaindex[merged$meta == "structure_recalibrated"]) | |
t.test(merged$metaindex_circ[merged$meta == "structure_recalibrated"], merged$metaindex[merged$meta == "structure_recalibrated"]) | |
# boxplot(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"]) | |
# t.test(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"]) | |
# | |
# boxplot(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"]) | |
# t.test(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"]) | |
# | |
# boxplot(merged$metaindex11_circ, merged$metaindex11) | |
# t.test(merged$metaindex11_circ, merged$metaindex11) | |
merged$delta = merged$metaindex - merged$metaindex_circ | |
a = aov(merged$delta ~ merged$meta) | |
summary(a) | |
boxplot(merged$delta ~ merged$meta) | |
p <- ggplot(merged, aes(x=meta, y=delta)) + | |
geom_violin(trim=FALSE, fill="gray") + | |
labs(title="Benefit from transfering the weights and structure",x="Type of transfer learning", y = "Difference in metaindex (avarage accuracy across datasets)")+ | |
geom_boxplot(width=0.1) + | |
geom_hline(aes(yintercept=0), linetype="dashed") + | |
ylim(-0.3, 0.3) + | |
theme_classic() | |
p | |
hist(merged$delta) | |
which(merged$delta>0) | |
merged$benefit = merged$delta>0 | |
library(lme4) | |
library(lmerTest) | |
model = lmer(delta ~ meta + layer1 + layer2 + layer3 + activation_function_layer1 + activation_function_layer2 + activation_function_layer3 + dropout_layer1 + layer1_regularizer + optimizer + (1|merged$hyperparameter_id), | |
data=merged, | |
REML=TRUE) | |
anova(model) | |
confint(model) | |
summary(model) | |
library(tidyverse) | |
broom.mixed::tidy(model, conf.int = TRUE) | |
if (require("brms") && require("dotwhisker") && require("ggplot2")) { | |
L <- load(system.file("extdata", "brms_example.rda", package="broom.mixed")) | |
gg0 <- (broom.mixed::tidy(model) | |
## disambiguate | |
%>% mutate(term=ifelse(grepl("sd__(Int",term,fixed=TRUE), | |
paste(group,term,sep="."), | |
term)) | |
%>% dwplot | |
) | |
gg0 + geom_vline(xintercept=0,lty=2) | |
} | |
OmicSelector_correlation_plot(merged$metaindex, merged$delta, "Transfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
OmicSelector_correlation_plot(merged$metaindex_circ, merged$delta, "Untransfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
OmicSelector_correlation_plot(merged$metaindex_TCGA, merged$delta, "Initial TCGA network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
# linearMod <- glm(benefit ~ as.factor(transfer_type) + layer1 + layer2 + layer3 + as.factor(activation_function_layer1) + as.factor(activation_function_layer2) | |
# + as.factor(activation_function_layer3) + as.numeric(as.factor(dropout_layer1)) + as.numeric(as.factor(layer1_regularizer)) + as.factor(optimizer), | |
# data=merged) | |
# summary(linearMod) | |
# library(caret) | |
# library(party) | |
# library(partykit) | |
# library(rattle) | |
# # Argumenty: paste0(colnames(transfer_performance), collapse = " + ") | |
# merged$benefit = as.factor(ifelse(as.factor(as.numeric(as.factor(merged$benefit))) == 1, "No", "Benefit")) | |
# train_control<- trainControl(method="repeatedcv", number=10, repeats = 10, classProbs=TRUE) | |
# model = caret::train(benefit ~ transfer_type + layer1 + layer2 + layer3 + activation_function_layer1 + activation_function_layer2 + activation_function_layer3 + dropout_layer1 + dropout_layer2 + dropout_layer3 + layer1_regularizer + layer2_regularizer + layer3_regularizer + optimizer + autoencoder + balanced + scaled, | |
# method = "rpart2", data = merged, trControl=train_control, tuneLength=20, maxdepth = 2) | |
# model | |
# fancyRpartPlot(model$finalModel) | |
# plot(model$finalModel) | |
# Finalne podejscie: top 100 sieci po transfer learningu wzgledem metaindexu, czy skorzystaly? | |
merged2 = merged[order(merged$metaindex, decreasing = T)] | |
table(merged$benefit[1:100]) | |
merged2 = merged2[1:100,] | |
boxplot(merged2$metaindex_circ, merged2$metaindex) | |
t.test(merged2$metaindex_circ, merged2$metaindex, paired = T) | |
hist(merged$delta) | |
shapiro.test(merged$delta) | |
hist(merged2$delta) | |
shapiro.test(merged2$delta) | |
t = paired(merged2$metaindex_circ, merged2$metaindex) | |
a = aov(merged2$delta ~ merged2$meta) | |
summary(a) | |
boxplot(merged2$delta ~ merged2$meta, ylab = "Difference in metaindex (mean accuracy on training, testing and validation datasets)", xlab = "Method of transfer") | |
TukeyHSD(a) | |
p <- ggplot(merged2, aes(x=meta, y=delta)) + | |
geom_violin(trim=FALSE, fill="gray") + | |
labs(title="Benefit from transfering the weights and structure",x="Type of transfer learning", y = "Difference in metaindex (avarage accuracy across datasets)")+ | |
geom_boxplot(width=0.1) + | |
geom_hline(aes(yintercept=0), linetype="dashed") + | |
ylim(-0.3, 0.3) + | |
theme_classic() | |
p | |
model = lmer(delta ~ -1 + meta + layer1 + layer2 + layer3 + activation_function_layer1 + activation_function_layer2 + activation_function_layer3 + dropout_layer1 + layer1_regularizer + optimizer + (1|hyperparameter_id), | |
data=merged2) | |
anova(model) | |
confint(model) | |
summary(model) | |
library(tidyverse) | |
broom.mixed::tidy(model, conf.int = TRUE) | |
if (require("brms") && require("dotwhisker") && require("ggplot2")) { | |
L <- load(system.file("extdata", "brms_example.rda", package="broom.mixed")) | |
gg0 <- (broom.mixed::tidy(model) | |
## disambiguate | |
%>% mutate(term=ifelse(grepl("sd__(Int",term,fixed=TRUE), | |
paste(group,term,sep="."), | |
term)) | |
%>% dwplot | |
) | |
gg0 + geom_vline(xintercept=0,lty=2) + | |
theme_classic() | |
} | |
OmicSelector_correlation_plot(merged2$metaindex, merged2$delta, "Transfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
OmicSelector_correlation_plot(merged2$metaindex_circ, merged2$delta, "Untransfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
OmicSelector_correlation_plot(merged2$metaindex_TCGA, merged2$delta, "Initial TCGA network metaindex", "Difference in metaindex after training", "Comparison", yx = F) | |
# Best network analysis | |
best_recalibrated = merged[which(merged$metaindex == max(merged$metaindex)),,] | |
best_circ = ourseq_models[which(ourseq_models$metaindex == max(ourseq_models$metaindex)),] | |
best_recalibrated | |
best_circ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# remotes::install_github("kstawiski/OmicSelector", ref = "dev") | |
setwd("~/snorlax/DOKTORAT/transfer_learning/pancreatic/transfer_1") | |
library(OmicSelector) | |
library(dplyr) | |
library("scatterplot3d") | |
OmicSelector_load_extension("deeplearning") | |
# Get TCGA results | |
tcga_models = data.table::fread("../tcga/pancreatic_tcga.csv") | |
ourseq_models = data.table::fread("../circulating_our/pancreatic_ourseq.csv") | |
tcga_models$metaindex = (tcga_models$training_Accuracy + tcga_models$test_Accuracy + tcga_models$valid_Accuracy) / 3 | |
hist(tcga_models$metaindex) | |
summary(tcga_models$metaindex) | |
tcga_models = arrange(tcga_models, desc(metaindex)) | |
top_tcga_models = tcga_models[1:100,] | |
plot(top_tcga_models$training_Accuracy, top_tcga_models$valid_Accuracy) | |
plot(top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy) | |
plot(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy) | |
scatterplot3d(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy, angle = 45, pch = 16, | |
grid=TRUE, box=FALSE, xlab = "Trainign Accuracy", ylab = "Testing Accuracy", zlab = "Validation Accuracy") | |
data.table::fwrite(top_tcga_models, "top_tcga_models.csv") | |
transfered_networks_structure = data.frame() | |
transfered_networks_layer1 = data.frame() | |
transfered_networks_layer2 = data.frame() | |
transfered_networks_layer3 = data.frame() | |
for(i in 1:100) { | |
tcga_model_modelfile = paste0("../tcga/models/pancreatic_tcga/pancreatic_tcga_", top_tcga_models$model_id[i], ".zip") | |
file.exists(tcga_model_modelfile) | |
try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 0, freeze_to = 0, | |
train = data.table::fread("../circulating_our/mixed_train.csv"), | |
test = data.table::fread("../circulating_our/mixed_test.csv"), | |
valid = data.table::fread("../circulating_our/mixed_valid.csv")) | |
transfered_networks_structure = rbind.fill(transfered_networks_structure, temp2) }) | |
try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 1, freeze_to = 1, | |
train = data.table::fread("../circulating_our/mixed_train.csv"), | |
test = data.table::fread("../circulating_our/mixed_test.csv"), | |
valid = data.table::fread("../circulating_our/mixed_valid.csv")) | |
transfered_networks_layer1 = rbind.fill(transfered_networks_layer1, temp2) }) | |
try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 1, freeze_to = 2, | |
train = data.table::fread("../circulating_our/mixed_train.csv"), | |
test = data.table::fread("../circulating_our/mixed_test.csv"), | |
valid = data.table::fread("../circulating_our/mixed_valid.csv")) | |
transfered_networks_layer2 = rbind.fill(transfered_networks_layer2, temp2) }) | |
try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 1, freeze_to = 3, | |
train = data.table::fread("../circulating_our/mixed_train.csv"), | |
test = data.table::fread("../circulating_our/mixed_test.csv"), | |
valid = data.table::fread("../circulating_our/mixed_valid.csv")) | |
transfered_networks_layer3 = rbind.fill(transfered_networks_layer3, temp2) }) | |
if(!dir.exists("transfer_models")) { dir.create("transfer_models") } | |
file.copy(list.files(".", "\\.zip$"), "transfer_models/") | |
file.remove(list.files(".", "\\.zip$")) | |
} | |
data.table::fwrite(transfered_networks_structure, "transfered_networks_structure.csv") | |
data.table::fwrite(transfered_networks_layer1, "transfered_networks_layer1.csv") | |
data.table::fwrite(transfered_networks_layer2, "transfered_networks_layer2.csv") | |
data.table::fwrite(transfered_networks_layer3, "transfered_networks_layer3.csv") | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
suppressMessages(library(plyr)) | |
suppressMessages(library(dplyr)) | |
suppressMessages(library(caret)) | |
suppressMessages(library(epiDisplay)) | |
suppressMessages(library(pROC)) | |
suppressMessages(library(ggplot2)) | |
suppressMessages(library(DMwR)) | |
suppressMessages(library(ROSE)) | |
suppressMessages(library(gridExtra)) | |
suppressMessages(library(gplots)) | |
suppressMessages(library(devtools)) | |
suppressMessages(library(stringr)) | |
suppressMessages(library(data.table)) | |
suppressMessages(library(tidyverse)) | |
library(OmicSelector) | |
suppressMessages(library(funModeling)) | |
message("OmicSelector: DeepLearning extension loaded.") | |
if(!dir.exists(paste0("models"))) { dir.create(paste0("models")) } | |
if(!dir.exists(paste0("temp"))) { dir.create(paste0("temp")) } | |
OmicSelector_keras_create_model <- function(i, hyperparameters, how_many_features = ncol(x_train_scale)) { | |
# tempmodel <- keras_model_sequential() %>% | |
# { if(hyperparameters[i,10]==T) { layer_dense(. , units = hyperparameters[i,1], kernel_regularizer = regularizer_l2(l = 0.001), | |
# activation = hyperparameters[i,4], input_shape = c(ncol(x_train_scale))) } else | |
# { layer_dense(. , units = hyperparameters[i,1], activation = hyperparameters[i,4], | |
# input_shape = c(ncol(x_train_scale))) } } %>% | |
# { if(hyperparameters[i,7]>0) { layer_dropout(. , rate = hyperparameters[i,7]) } else { . } } %>% | |
# { if(hyperparameters[i,2]>0) { | |
# if(hyperparameters[i,11]==T) { layer_dense(. , units = hyperparameters[i,2], activation = hyperparameters[i,5], | |
# kernel_regularizer = regularizer_l2(l = 0.001)) } else { | |
# layer_dense(units = hyperparameters[i,2], activation = hyperparameters[i,5]) } } } %>% | |
# { if(hyperparameters[i,8]>0) { layer_dropout(rate = hyperparameters[i,8]) } else { . } } %>% | |
# { if(hyperparameters[i,3]>0) { | |
# if(hyperparameters[i,12]==T) { layer_dense(units = hyperparameters[i,3], activation = hyperparameters[i,6], | |
# kernel_regularizer = regularizer_l2(l = 0.001)) } else | |
# {layer_dense(units = hyperparameters[i,3], activation = hyperparameters[i,6])} } else { . } } %>% | |
# { if(hyperparameters[i,9]>0) { layer_dropout(rate = hyperparameters[i,9]) } else { . } } %>% | |
# layer_dense(units = 1, activation = 'sigmoid') | |
library(keras) | |
tempmodel <- keras_model_sequential() | |
if(hyperparameters[i,10]==T) { layer_dense(tempmodel , units = hyperparameters[i,1], kernel_regularizer = regularizer_l1(l = 0.001), | |
activation = hyperparameters[i,4], input_shape = c(how_many_features)) } else | |
{ layer_dense(tempmodel , units = hyperparameters[i,1], activation = hyperparameters[i,4], | |
input_shape = c(how_many_features)) } | |
if(hyperparameters[i,7]>0) { layer_dropout(tempmodel , rate = hyperparameters[i,7]) } | |
if(hyperparameters[i,2]>0) { | |
if(hyperparameters[i,11]==T) { layer_dense(tempmodel , units = hyperparameters[i,2], activation = hyperparameters[i,5], | |
kernel_regularizer = regularizer_l1(l = 0.001)) } else | |
{layer_dense(tempmodel, units = hyperparameters[i,2], activation = hyperparameters[i,5]) } } | |
if(hyperparameters[i,2]>0 & hyperparameters[i,8]>0) { layer_dropout(tempmodel, rate = hyperparameters[i,8]) } | |
if(hyperparameters[i,3]>0) { | |
if(hyperparameters[i,12]==T) { layer_dense(tempmodel, units = hyperparameters[i,3], activation = hyperparameters[i,6], | |
kernel_regularizer = regularizer_l1(l = 0.001)) } else | |
{ layer_dense(tempmodel, units = hyperparameters[i,3], activation = hyperparameters[i,6])} } | |
if(hyperparameters[i,3]>0 & hyperparameters[i,9]>0) { layer_dropout(rate = hyperparameters[i,9]) } | |
layer_dense(tempmodel, units = 2, activation = 'softmax') | |
print(tempmodel) | |
dnn_class_model = keras::compile(tempmodel, optimizer = hyperparameters[i,13], | |
loss = 'binary_crossentropy', | |
metrics = 'accuracy') | |
} | |
# autoencoder ma softmax na deep feature | |
# jesli zmienna autoencoder >0 budujemy autoencoder 3-wartwowy, jesli <0 budujemy autoencoder 3-warstwowy z regularyzacja (sparse autoencoder) | |
OmicSelector_deep_learning = function(selected_miRNAs = ".", wd = getwd(), | |
SMOTE = F, keras_batch_size = 64, clean_temp_files = T, | |
save_threshold_trainacc = 0.85, save_threshold_testacc = 0.8, keras_epochae = 5000, | |
keras_epoch = 2000, keras_patience = 50, | |
hyperparameters = expand.grid(layer1 = seq(3,11, by = 2), layer2 = c(0,seq(3,11, by = 2)), layer3 = c(0,seq(3,11, by = 2)), | |
activation_function_layer1 = c("relu","sigmoid"), activation_function_layer2 = c("relu","sigmoid"), activation_function_layer3 = c("relu","sigmoid"), | |
dropout_layer1 = c(0, 0.1), dropout_layer2 = c(0, 0.1), dropout_layer3 = c(0), | |
layer1_regularizer = c(T,F), layer2_regularizer = c(T,F), layer3_regularizer = c(T,F), | |
optimizer = c("adam","rmsprop","sgd"), autoencoder = c(0,7,-7), balanced = SMOTE, formula = as.character(OmicSelector_create_formula(selected_miRNAs))[3], scaled = c(T,F), | |
stringsAsFactors = F), add_features_to_predictions = F, | |
keras_threads = ceiling(parallel::detectCores()/2), start = 1, end = nrow(hyperparameters), output_file = "deeplearning_results.csv", save_all_vars = F, automatic_weight = F) | |
{ | |
# library(OmicSelector) | |
# OmicSelector_load_extension("deeplearning") | |
codename = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(output_file)) | |
#options(warn=-1) | |
oldwd = getwd() | |
setwd = setwd(wd) | |
set.seed(1) | |
if(dir.exists("/OmicSelector")) { | |
if(!dir.exists("/OmicSelector/temp")) { dir.create("/OmicSelector/temp") } | |
temp_dir = "/OmicSelector/temp/" | |
} else { | |
temp_dir = tempdir() | |
} | |
if(!dir.exists("temp")) { dir.create("temp") } | |
if(!dir.exists("models")) { dir.create("models") } | |
options(bitmapType = 'cairo', device = 'png') | |
library(plyr) | |
library(dplyr) | |
library(keras) | |
library(foreach) | |
library(doParallel) | |
#library(doSNOW) | |
library(data.table) | |
fwrite(hyperparameters, paste0("hyperparameters_",output_file)) | |
dane = OmicSelector_load_datamix(wd = wd, replace_smote = F); train = dane[[1]]; test = dane[[2]]; valid = dane[[3]]; train_smoted = dane[[4]]; trainx = dane[[5]]; trainx_smoted = dane[[6]] | |
if (SMOTE == T) { train = train_smoted } | |
message("Checkpoint passed: load lib and data") | |
#cores=detectCores() | |
cat(paste0("\nTemp dir: ", temp_dir, "\n")) | |
cat("\nStarting preparing cluster..\n") | |
#cl <- makePSOCKcluster(keras_threads) #not to overload your computer | |
cl = makeCluster(keras_threads, outfile=paste0("temp/", ceiling(as.numeric(Sys.time())), "deeplearning_cluster.log")) | |
registerDoParallel(cl) | |
on.exit(stopCluster(cl)) | |
cat("\nCluster prepared..\n") | |
#message("Checkpoint passed: cluster prepared") | |
#args= names(mget(ls())) | |
#export = export[!export %in% args] | |
# tu musi isc iteracja | |
cat(paste0("\nStarting parallel loop.. There are: ", end-start+1, " hyperparameter sets to be checked.\n")) | |
final <- foreach(i=as.numeric(start):as.numeric(end), .combine=rbind, .verbose=F, .inorder=F, .export = ls() | |
#,.packages = loadedNamespaces() | |
) %dopar% { | |
Sys.setenv(TF_FORCE_GPU_ALLOW_GROWTH = 'true') | |
library(OmicSelector) | |
OmicSelector_load_extension("deeplearning") | |
if(!dir.exists(paste0(temp_dir,"/models"))) { dir.create(paste0(temp_dir,"/models")) } | |
if(!dir.exists(paste0(temp_dir,"/temp"))) { dir.create(paste0(temp_dir,"/temp")) } | |
start_time <- Sys.time() | |
library(keras) | |
library(ggplot2) | |
library(dplyr) | |
library(data.table) | |
set.seed(1) | |
# library(tensorflow) | |
# gpu = tf$test$is_gpu_available() | |
# if(gpu) { | |
# gpux <- tf$config$experimental$get_visible_devices('GPU')[[1]] | |
# tf$config$experimental$set_memory_growth(device = gpux, enable = TRUE) | |
# py_run_string("gpus = tf.config.experimental.list_physical_devices('GPU')") | |
# py_run_string("tf.config.experimental.set_virtual_device_configuration(gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)])") | |
# } | |
cat("\nStarting hyperparameters..\n") | |
print(hyperparameters[i,]) | |
message(paste0("OmicSelector: Starting training network no ", i, ".")) | |
message(paste0(hyperparameters[i,], collapse = ", ")) | |
options(bitmapType = 'cairo', device = 'png') | |
model_id = paste0(format(i, scientific = FALSE), "-", ceiling(as.numeric(Sys.time()))) | |
if(SMOTE == T) { model_id = paste0(format(i, scientific = FALSE), "-SMOTE-", ceiling(as.numeric(Sys.time()))) } | |
tempwyniki = data.frame(model_id=model_id) | |
tempwyniki[1, "model_id"] = model_id | |
#message("Checkpoint passed: chunk 1") | |
if(!dir.exists(paste0(temp_dir,"/models"))) { dir.create(paste0(temp_dir,"/models"))} | |
if(!dir.exists(paste0(temp_dir,"/models/keras",model_id))) { dir.create(paste0(temp_dir,"/models/keras",model_id))} | |
cat(paste0("\nTraining model: ",temp_dir,"/models/keras",model_id,"\n")) | |
#message("Checkpoint passed: chunk 2") | |
#pdf(paste0(temp_dir,"/models/keras",model_id,"/plots.pdf"), paper="a4") | |
con <- file(paste0(temp_dir,"/models/keras",model_id,"/training.log")) | |
sink(con, append=TRUE, split =TRUE) | |
sink(con, append=TRUE, type="message") | |
early_stop <- callback_early_stopping(monitor = "val_loss", mode="min", patience = keras_patience) | |
cp_callback <- callback_model_checkpoint( | |
filepath = paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5"), | |
save_best_only = TRUE, period = 10, monitor = "val_loss", | |
verbose = 0 | |
) | |
ae_cp_callback <- callback_model_checkpoint( | |
filepath = paste0(temp_dir,"/models/keras",model_id,"/autoencoderweights.hdf5"), | |
save_best_only = TRUE, save_weights_only = T, period = 10, monitor = "val_loss", | |
verbose = 0 | |
) | |
x_train <- train %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(., selected_miRNAs) } else { dplyr::select(., starts_with("hsa")) } } %>% | |
as.matrix() | |
y_train <- train %>% | |
dplyr::select("Class") %>% | |
as.matrix() | |
y_train[,1] = ifelse(y_train[,1] == "Cancer",1,0) | |
x_test <- test %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
as.matrix() | |
y_test <- test %>% | |
dplyr::select("Class") %>% | |
as.matrix() | |
y_test[,1] = ifelse(y_test[,1] == "Cancer",1,0) | |
x_valid <- valid %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
as.matrix() | |
y_valid <- valid %>% | |
dplyr::select("Class") %>% | |
as.matrix() | |
y_valid[,1] = ifelse(y_valid[,1] == "Cancer",1,0) | |
#message("Checkpoint passed: chunk 3") | |
if(automatic_weight) { | |
counter=funModeling::freq(to_categorical(y_train), plot=F) %>% select(var, frequency) | |
majority=max(counter$frequency) | |
counter$weight=ceil(majority/counter$frequency) | |
l_weights=setNames(as.list(counter$weight), counter$var) | |
message(l_weights) | |
} else { | |
l_weights = NULL | |
} | |
if(hyperparameters[i, 17] == T) { | |
x_train_scale = x_train %>% scale() | |
col_mean_train <- attr(x_train_scale, "scaled:center") | |
saveRDS(col_mean_train, paste0(temp_dir,"/models/keras",model_id,"/col_mean_train.RDS")) | |
col_sd_train <- attr(x_train_scale, "scaled:scale") | |
saveRDS(col_sd_train, paste0(temp_dir,"/models/keras",model_id,"/col_sd_train.RDS")) | |
x_test_scale <- x_test %>% | |
scale(center = col_mean_train, | |
scale = col_sd_train) | |
x_valid_scale <- x_valid %>% | |
scale(center = col_mean_train, | |
scale = col_sd_train) | |
} | |
else { | |
x_train_scale = x_train | |
x_test_scale <- x_test | |
x_valid_scale <- x_valid | |
} | |
input_layer <- layer_input(shape = c(ncol(x_train_scale))) | |
#message("Checkpoint passed: chunk 4") | |
#psych::describe(x_test_scale) | |
# czy autoenkoder? | |
if(hyperparameters[i,14] != 0) { | |
n1 <- hyperparameters[i,14] | |
n3 <- ncol(x_train_scale) | |
n2 = 7 | |
if(ceiling(n3/2) > 7) { n2 = ceiling(n3/2) } | |
if (hyperparameters[i,14]>0) { | |
encoder <- | |
input_layer %>% | |
layer_dense(units = n2, activation = hyperparameters[i,6]) %>% | |
layer_dense(units = n1, activation = "softmax") # dimensions of final encoding layer | |
decoder <- encoder %>% | |
layer_dense(units = n2, activation = hyperparameters[i,6]) %>% | |
layer_dense(units = n3, hyperparameters[i,6]) # dimension of original variable | |
} | |
else { | |
n1 = -n1 # korekta dla ujemnej wartosci w hiperparametrach | |
encoder <- | |
input_layer %>% | |
layer_dense(units = n2, activation = hyperparameters[i,6], kernel_regularizer = regularizer_l1(l = 0.01)) %>% | |
layer_dense(units = n1, activation = "softmax", kernel_regularizer = regularizer_l1(l = 0.01)) # dimensions of final encoding layer | |
decoder <- encoder %>% | |
layer_dense(units = n2, activation = hyperparameters[i,6], kernel_regularizer = regularizer_l1(l = 0.01)) %>% | |
layer_dense(units = n3, hyperparameters[i,6]) # dimension of original variable | |
} | |
ae_model <- keras_model(inputs = input_layer, outputs = decoder) | |
ae_model | |
ae_model %>% | |
keras::compile(loss = "mean_absolute_error", | |
optimizer = hyperparameters[i,13], | |
metrics = c("mean_squared_error")) | |
summary(ae_model) | |
#message("Checkpoint passed: chunk 5") | |
#ae_tempmodelfile = tempfile() | |
ae_history <- fit(ae_model, x = x_train_scale, | |
y = x_train_scale, | |
epochs = keras_epochae, batch_size = keras_batch_size, | |
shuffle = T, verbose = 0, | |
view_metrics = FALSE, | |
validation_data = list(x_test_scale, x_test_scale), | |
callbacks = list(ae_cp_callback, early_stop)) | |
#file.copy(ae_tempmodelfile, paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5"), overwrite = T) | |
#ae_history_df <- as.data.frame(ae_history) | |
#fwrite(ae_history_df, paste0(temp_dir,"/models/keras",model_id,"/autoencoder_history_df.csv.gz")) | |
saveRDS(ae_history, paste0(temp_dir,"/models/keras",model_id,"/ae_history.RDS")) | |
compare_cx <- data.frame( | |
train_loss = ae_history$metrics$loss, | |
test_loss = ae_history$metrics$val_loss | |
) %>% | |
tibble::rownames_to_column() %>% | |
mutate(rowname = as.integer(rowname)) %>% | |
tidyr::gather(key = "type", value = "value", -rowname) | |
plot1 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
geom_line() + | |
xlab("epoch") + | |
ylab("loss") + | |
theme_bw() | |
#message("Checkpoint passed: chunk 6") | |
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_autoencoder.png"), grid.arrange(plot1, nrow =1, top = "Training of autoencoder")) | |
cat("\n- Reloading autoencoder to get weights...\n") | |
encoder_model <- keras_model(inputs = input_layer, outputs = encoder) | |
encoder_model %>% load_model_weights_hdf5(paste0(temp_dir,"/models/keras",model_id,"/autoencoderweights.hdf5"), | |
skip_mismatch = T, | |
by_name = T) | |
cat("\n- Autoencoder saving...\n") | |
save_model_hdf5(encoder_model, paste0(temp_dir,"/models/keras",model_id,"/autoencoder.hdf5")) | |
cat("\n- Creating deep features...\n") | |
#message("Checkpoint passed: chunk 7") | |
ae_x_train_scale <- encoder_model %>% | |
predict(x_train_scale) %>% | |
as.matrix() | |
fwrite(ae_x_train_scale, paste0(temp_dir,"/models/keras",model_id,"/deepfeatures_train.csv")) | |
ae_x_test_scale <- encoder_model %>% | |
predict(x_test_scale) %>% | |
as.matrix() | |
fwrite(ae_x_test_scale, paste0(temp_dir,"/models/keras",model_id,"/deepfeatures_test.csv")) | |
ae_x_valid_scale <- encoder_model %>% | |
predict(x_valid_scale) %>% | |
as.matrix() | |
fwrite(ae_x_valid_scale, paste0(temp_dir,"/models/keras",model_id,"/deepfeatures_valid.csv")) | |
# # podmiana eby nie edytowa kodu | |
x_train_scale = as.matrix(ae_x_train_scale) | |
x_test_scale = as.matrix(ae_x_test_scale) | |
x_valid_scale = as.matrix(ae_x_valid_scale) | |
cat("\n- Training model based on deep features...\n") | |
dnn_class_model <- OmicSelector_keras_create_model(i, hyperparameters = hyperparameters, how_many_features = ncol(x_train_scale)) | |
#message("Checkpoint passed: chunk 8") | |
message("Starting training...") | |
#tempmodelfile = tempfile() | |
history <- fit(dnn_class_model, x = x_train_scale, | |
y = to_categorical(y_train), | |
epochs = keras_epoch, | |
validation_data = list(x_test_scale, to_categorical(y_test)), | |
callbacks = list(cp_callback, early_stop), | |
verbose = 0, | |
view_metrics = FALSE, | |
batch_size = keras_batch_size, shuffle = T, class_weight = l_weights) | |
message(history) | |
print(history) | |
#message("Checkpoint passed: chunk 8") | |
#plot(history, col="black") | |
#history_df <- as.data.frame(history) | |
# fwrite(history_df, paste0(temp_dir,"/models/keras",model_id,"/history_df.csv.gz")) | |
saveRDS(history, paste0(temp_dir,"/models/keras",model_id,"/history.RDS")) | |
#message("Checkpoint passed: chunk 9") | |
cat("\n- Saving history and plots...\n") | |
compare_cx <- data.frame( | |
train_loss = history$metrics$loss, | |
test_loss = history$metrics$val_loss | |
) %>% | |
tibble::rownames_to_column() %>% | |
mutate(rowname = as.integer(rowname)) %>% | |
tidyr::gather(key = "type", value = "value", -rowname) | |
plot1 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
geom_line() + | |
xlab("Epoch") + | |
ylab("Loss") + | |
theme_bw() | |
#message("Checkpoint passed: chunk 10") | |
compare_cx <- data.frame( | |
train_accuracy = history$metrics$accuracy, | |
test_accuracy = history$metrics$val_accuracy | |
) %>% | |
tibble::rownames_to_column() %>% | |
mutate(rowname = as.integer(rowname)) %>% | |
tidyr::gather(key = "type", value = "value", -rowname) | |
plot2 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
geom_line() + | |
xlab("Epoch") + | |
ylab("Accuracy") + | |
theme_bw() | |
#message("Checkpoint passed: chunk 12") | |
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training.png"), grid.arrange(plot1, plot2, nrow =2, top = "Training of final neural network")) | |
# ewaluacja | |
cat("\n- Saving final model...\n") | |
dnn_class_model = load_model_hdf5(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")) | |
y_train_pred <- predict(object = dnn_class_model, x = x_train_scale) | |
y_test_pred <- predict(object = dnn_class_model, x = x_test_scale) | |
y_valid_pred <- predict(object = dnn_class_model, x = x_valid_scale) | |
#message("Checkpoint passed: chunk 13") | |
# wybranie odciecia | |
pred = data.frame(`Class` = train$Class, `Pred` = y_train_pred) | |
library(cutpointr) | |
cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden) | |
summary(cutoff) | |
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/cutoff.png"), plot(cutoff)) | |
wybrany_cutoff = cutoff$optimal_cutpoint | |
#wyniki[i, "training_AUC"] = cutoff$AUC | |
tempwyniki[1, "training_AUC"] = cutoff$AUC | |
#wyniki[i, "cutoff"] = wybrany_cutoff | |
tempwyniki[1, "cutoff"] = wybrany_cutoff | |
cat(paste0("\n\n---- TRAINING AUC: ",cutoff$AUC," ----\n\n")) | |
cat(paste0("\n\n---- OPTIMAL CUTOFF: ",wybrany_cutoff," ----\n\n")) | |
cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n")) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_train) | |
#message("Checkpoint passed: chunk 14") | |
t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred) | |
tempwyniki[1, "training_AUC2"] = t1_roc$auc | |
tempwyniki[1, "training_AUC_lower95CI"] = as.character(ci(t1_roc))[1] | |
tempwyniki[1, "training_AUC_upper95CI"] = as.character(ci(t1_roc))[3] | |
saveRDS(t1_roc, paste0(temp_dir,"/models/keras",model_id,"/training_ROC.RDS")) | |
#message("Checkpoint passed: chunk 15") | |
tempwyniki[1, "training_Accuracy"] = cm_train$overall[1] | |
tempwyniki[1, "training_Sensitivity"] = cm_train$byClass[1] | |
tempwyniki[1, "training_Specificity"] = cm_train$byClass[2] | |
tempwyniki[1, "training_PPV"] = cm_train$byClass[3] | |
tempwyniki[1, "training_NPV"] = cm_train$byClass[4] | |
tempwyniki[1, "training_F1"] = cm_train$byClass[7] | |
saveRDS(cm_train, paste0(temp_dir,"/models/keras",model_id,"/cm_train.RDS")) | |
#message("Checkpoint passed: chunk 16") | |
cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n")) | |
pred = data.frame(`Class` = test$Class, `Pred` = y_test_pred) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_test) | |
tempwyniki[1, "test_Accuracy"] = cm_test$overall[1] | |
tempwyniki[1, "test_Sensitivity"] = cm_test$byClass[1] | |
tempwyniki[1, "test_Specificity"] = cm_test$byClass[2] | |
tempwyniki[1, "test_PPV"] = cm_test$byClass[3] | |
tempwyniki[1, "test_NPV"] = cm_test$byClass[4] | |
tempwyniki[1, "test_F1"] = cm_test$byClass[7] | |
saveRDS(cm_test, paste0(temp_dir,"/models/keras",model_id,"/cm_test.RDS")) | |
#message("Checkpoint passed: chunk 17") | |
cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n")) | |
pred = data.frame(`Class` = valid$Class, `Pred` = y_valid_pred) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_valid) | |
tempwyniki[1, "valid_Accuracy"] = cm_test$overall[1] | |
tempwyniki[1, "valid_Sensitivity"] = cm_test$byClass[1] | |
tempwyniki[1, "valid_Specificity"] = cm_test$byClass[2] | |
tempwyniki[1, "valid_PPV"] = cm_test$byClass[3] | |
tempwyniki[1, "valid_NPV"] = cm_test$byClass[4] | |
tempwyniki[1, "valid_F1"] = cm_test$byClass[7] | |
saveRDS(cm_valid, paste0(temp_dir,"/models/keras",model_id,"/cm_valid.RDS")) | |
#message("Checkpoint passed: chunk 18") | |
if(add_features_to_predictions) { | |
mix = rbind(train,test,valid) | |
mixx = rbind(x_train_scale, x_test_scale, x_valid_scale) | |
y_mixx_pred <- predict(object = dnn_class_model, x = mixx) | |
mix$Podzial = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))) | |
mix$Pred = y_mixx_pred[,2] | |
mix$PredClass = ifelse(mix$Pred >= wybrany_cutoff, "Cancer", "Control") | |
fwrite(mix, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) } else { | |
mix = rbind(train,test,valid) | |
mixx = rbind(x_train_scale, x_test_scale, x_valid_scale) | |
y_mixx_pred <- predict(object = dnn_class_model, x = mixx) | |
mix2 = data.frame( | |
`Podzial` = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))), | |
`Pred` = y_mixx_pred[,2], | |
`PredClass` = ifelse(y_mixx_pred[,2] >= wybrany_cutoff, "Cancer", "Control") | |
) | |
fwrite(mix2, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) | |
} | |
fwrite(cbind(hyperparameters[i,], tempwyniki), paste0(temp_dir,"/models/keras",model_id,"/wyniki.csv")) | |
#message("Checkpoint passed: chunk 19") | |
wagi = get_weights(dnn_class_model) | |
saveRDS(wagi, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.RDS")) | |
save_model_weights_hdf5(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.hdf5")) | |
saveRDS(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel.RDS")) | |
#message("Checkpoint passed: chunk 20") | |
# czy jest sens zapisywac? | |
#sink() | |
#sink(type="message") | |
message(paste0("\n ",model_id, ": ", tempwyniki[1, "training_Accuracy"], " / ", tempwyniki[1, "test_Accuracy"], " ==> ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc)) | |
cat(paste0("\n ",model_id, ": ", tempwyniki[1, "training_Accuracy"], " / ", tempwyniki[1, "test_Accuracy"], " ==> ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc)) | |
if(tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc) { | |
# zapisywanie modelu do waciwego katalogu | |
if (save_all_vars) { save(list = ls(all=TRUE), file = paste0(temp_dir,"/models/keras",model_id,"/all.Rdata.gz"), compress = "gzip", compression_level = 9) } | |
if(!dir.exists(paste0("models/",codename,"/"))) { dir.create(paste0("models/",codename,"/")) } | |
if(dir.exists("/OmicSelector")) { | |
system(paste0("/bin/bash -c 'screen -dmS save_network zip -9 -r ", paste0("models/",codename,"/",codename, "_", model_id,".zip") ," ", paste0(temp_dir,"models/keras",model_id), "/'")) | |
} else { | |
zip(paste0("models/",codename,"/",codename, "_", model_id,".zip"),list.files(paste0(temp_dir,"models/keras",model_id), full.names = T, recursive = T, include.dirs = T)) } | |
# file.copy(list.files(paste0(temp_dir,"/models/keras",model_id), pattern = "_wyniki.csv$", full.names = T, recursive = T, include.dirs = T),paste0("temp/",codename,"_",model_id,"_deeplearningresults.csv")) | |
if (!dir.exists(paste0("temp/",codename,"/"))) { dir.create(paste0("temp/",codename,"/")) } | |
file.copy(list.files(paste0(temp_dir,"/models/keras",model_id), pattern = "_wyniki.csv$", full.names = T, recursive = T, include.dirs = T),paste0("temp/",codename,"/",model_id,"_deeplearningresults.csv")) | |
#message("Checkpoint passed: chunk 21") | |
#dev.off() | |
} | |
} else { | |
x_train <- train %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
as.matrix() | |
y_train <- train %>% | |
dplyr::select("Class") %>% | |
as.matrix() | |
y_train[,1] = ifelse(y_train[,1] == "Cancer",1,0) | |
#message("Checkpoint passed: chunk 22") | |
x_test <- test %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
as.matrix() | |
y_test <- test %>% | |
dplyr::select("Class") %>% | |
as.matrix() | |
y_test[,1] = ifelse(y_test[,1] == "Cancer",1,0) | |
x_valid <- valid %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
as.matrix() | |
y_valid <- valid %>% | |
dplyr::select("Class") %>% | |
as.matrix() | |
y_valid[,1] = ifelse(y_valid[,1] == "Cancer",1,0) | |
#message("Checkpoint passed: chunk 23") | |
if(hyperparameters[i, 17] == T) { | |
x_train_scale = x_train %>% scale() | |
col_mean_train <- attr(x_train_scale, "scaled:center") | |
col_sd_train <- attr(x_train_scale, "scaled:scale") | |
x_test_scale <- x_test %>% | |
scale(center = col_mean_train, | |
scale = col_sd_train) | |
x_valid_scale <- x_valid %>% | |
scale(center = col_mean_train, | |
scale = col_sd_train) | |
} | |
else { | |
x_train_scale = x_train | |
x_test_scale <- x_test | |
x_valid_scale <- x_valid | |
} | |
#message("Checkpoint passed: chunk 24") | |
dnn_class_model <- OmicSelector_keras_create_model(i, hyperparameters = hyperparameters, how_many_features = ncol(x_train_scale)) | |
message("Starting training...") | |
history <- fit(dnn_class_model, x = x_train_scale, | |
y = to_categorical(y_train), | |
epochs = keras_epoch, | |
validation_data = list(x_test_scale, to_categorical(y_test)), | |
callbacks = list( | |
cp_callback, | |
#callback_reduce_lr_on_plateau(monitor = "val_loss", factor = 0.1), | |
#callback_model_checkpoint(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")), | |
early_stop), | |
verbose = 0, | |
view_metrics = FALSE, | |
batch_size = keras_batch_size, shuffle = T, class_weight = l_weights) | |
print(history) | |
#message("Checkpoint passed: chunk 25") | |
message(history) | |
#plot(history, col="black") | |
saveRDS(history, paste0(temp_dir,"/models/keras",model_id,"/history.RDS")) | |
#message("Checkpoint passed: chunk 26") | |
#fwrite(history_df, paste0(temp_dir,"/models/keras",model_id,"/history_df.csv.gz")) | |
# pdf(paste0(temp_dir,"/models/keras",model_id,"/plots.pdf")) | |
compare_cx <- data.frame( | |
train_loss = history$metrics$loss, | |
test_loss = history$metrics$val_loss | |
) %>% | |
tibble::rownames_to_column() %>% | |
mutate(rowname = as.integer(rowname)) %>% | |
tidyr::gather(key = "type", value = "value", -rowname) | |
plot1 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
geom_line() + | |
xlab("epoch") + | |
ylab("loss") + | |
theme_bw() | |
#message("Checkpoint passed: chunk 27") | |
compare_cx <- data.frame( | |
train_accuracy = history$metrics$accuracy, | |
test_accuracy = history$metrics$val_accuracy | |
) %>% | |
tibble::rownames_to_column() %>% | |
mutate(rowname = as.integer(rowname)) %>% | |
tidyr::gather(key = "type", value = "value", -rowname) | |
plot2 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) + | |
geom_line() + | |
xlab("epoch") + | |
ylab("loss") + | |
theme_bw() | |
#message("Checkpoint passed: chunk 28") | |
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training.png"), grid.arrange(plot1, plot2, nrow =2, top = "Training of final neural network")) | |
# ewaluacja | |
dnn_class_model = load_model_hdf5(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")) | |
y_train_pred <- predict(object = dnn_class_model, x = x_train_scale) | |
y_test_pred <- predict(object = dnn_class_model, x = x_test_scale) | |
y_valid_pred <- predict(object = dnn_class_model, x = x_valid_scale) | |
#message("Checkpoint passed: chunk 29") | |
# wybranie odciecia | |
pred = data.frame(`Class` = train$Class, `Pred` = y_train_pred) | |
library(cutpointr) | |
cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden) | |
print(summary(cutoff)) | |
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/cutoff.png"), plot(cutoff)) | |
wybrany_cutoff = cutoff$optimal_cutpoint | |
#wyniki[i, "training_AUC"] = cutoff$AUC | |
tempwyniki[1, "training_AUC"] = cutoff$AUC | |
#wyniki[i, "cutoff"] = wybrany_cutoff | |
tempwyniki[1, "cutoff"] = wybrany_cutoff | |
#message("Checkpoint passed: chunk 30") | |
cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n")) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_train) | |
#message("Checkpoint passed: chunk 31") | |
t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred) | |
tempwyniki[1, "training_AUC2"] = t1_roc$auc | |
tempwyniki[1, "training_AUC_lower95CI"] = as.character(ci(t1_roc))[1] | |
tempwyniki[1, "training_AUC_upper95CI"] = as.character(ci(t1_roc))[3] | |
saveRDS(t1_roc, paste0(temp_dir,"/models/keras",model_id,"/training_ROC.RDS")) | |
#ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_ROC.png"), grid.arrange(plot(t1_roc), nrow =1, top = "Training ROC curve")) | |
#message("Checkpoint passed: chunk 32") | |
tempwyniki[1, "training_Accuracy"] = cm_train$overall[1] | |
tempwyniki[1, "training_Sensitivity"] = cm_train$byClass[1] | |
tempwyniki[1, "training_Specificity"] = cm_train$byClass[2] | |
tempwyniki[1, "training_PPV"] = cm_train$byClass[3] | |
tempwyniki[1, "training_NPV"] = cm_train$byClass[4] | |
tempwyniki[1, "training_F1"] = cm_train$byClass[7] | |
saveRDS(cm_train, paste0(temp_dir,"/models/keras",model_id,"/cm_train.RDS")) | |
#message("Checkpoint passed: chunk 33") | |
cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n")) | |
pred = data.frame(`Class` = test$Class, `Pred` = y_test_pred) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_test) | |
tempwyniki[1, "test_Accuracy"] = cm_test$overall[1] | |
tempwyniki[1, "test_Sensitivity"] = cm_test$byClass[1] | |
tempwyniki[1, "test_Specificity"] = cm_test$byClass[2] | |
tempwyniki[1, "test_PPV"] = cm_test$byClass[3] | |
tempwyniki[1, "test_NPV"] = cm_test$byClass[4] | |
tempwyniki[1, "test_F1"] = cm_test$byClass[7] | |
saveRDS(cm_test, paste0(temp_dir,"/models/keras",model_id,"/cm_test.RDS")) | |
#message("Checkpoint passed: chunk 34") | |
cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n")) | |
pred = data.frame(`Class` = valid$Class, `Pred` = y_valid_pred) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_valid) | |
tempwyniki[1, "valid_Accuracy"] = cm_valid$overall[1] | |
tempwyniki[1, "valid_Sensitivity"] = cm_valid$byClass[1] | |
tempwyniki[1, "valid_Specificity"] = cm_valid$byClass[2] | |
tempwyniki[1, "valid_PPV"] = cm_valid$byClass[3] | |
tempwyniki[1, "valid_NPV"] = cm_valid$byClass[4] | |
tempwyniki[1, "valid_F1"] = cm_valid$byClass[7] | |
saveRDS(cm_valid, paste0(temp_dir,"/models/keras",model_id,"/cm_valid.RDS")) | |
#message("Checkpoint passed: chunk 35") | |
if(add_features_to_predictions) { | |
mix = rbind(train,test,valid) | |
mixx = rbind(x_train_scale, x_test_scale, x_valid_scale) | |
y_mixx_pred <- predict(object = dnn_class_model, x = mixx) | |
mix$Podzial = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))) | |
mix$Pred = y_mixx_pred[,2] | |
mix$PredClass = ifelse(mix$Pred >= wybrany_cutoff, "Cancer", "Control") | |
fwrite(mix, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) } else { | |
mix = rbind(train,test,valid) | |
mixx = rbind(x_train_scale, x_test_scale, x_valid_scale) | |
y_mixx_pred <- predict(object = dnn_class_model, x = mixx) | |
mix2 = data.frame( | |
`Podzial` = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))), | |
`Pred` = y_mixx_pred[,2], | |
`PredClass` = ifelse(y_mixx_pred[,2] >= wybrany_cutoff, "Cancer", "Control") | |
) | |
fwrite(mix2, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) | |
} | |
fwrite(cbind(hyperparameters[i,], tempwyniki), paste0(temp_dir,"/models/keras",model_id,"/wyniki.csv")) | |
wagi = get_weights(dnn_class_model) | |
saveRDS(wagi, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.RDS")) | |
save_model_weights_hdf5(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.hdf5")) | |
saveRDS(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel.RDS")) | |
#message("Checkpoint passed: chunk 36") | |
# czy jest sens zapisywac? | |
cat(paste0("\n ",model_id, ": ", tempwyniki[1, "training_Accuracy"], " / ", tempwyniki[1, "test_Accuracy"], " ==> ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc)) | |
if(tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc) { | |
# zapisywanie modelu do waciwego katalogu | |
#message("Checkpoint passed: chunk 37e") | |
if (save_all_vars) { save(list = ls(all=TRUE), file = paste0(temp_dir,"/models/keras",model_id,"/all.Rdata.gz"), compress = "gzip", compression_level = 9) } | |
#message("Checkpoint passed: chunk 37d") | |
if(!dir.exists(paste0("models/",codename,"/"))) { dir.create(paste0("models/",codename,"/")) } | |
#message("Checkpoint passed: chunk 37c") | |
if(dir.exists("/OmicSelector")) { | |
system(paste0("/bin/bash -c 'screen -dmS save_network zip -9 -r ", paste0("models/",codename,"/",codename, "_", model_id,".zip") ," ", paste0(temp_dir,"models/keras",model_id), "/'")) | |
} else { | |
zip(paste0("models/",codename,"/",codename, "_", model_id,".zip"),list.files(paste0(temp_dir,"models/keras",model_id), full.names = T, recursive = T, include.dirs = T)) } | |
#message("Checkpoint passed: chunk 37b") | |
if (!dir.exists(paste0("temp/",codename,"/"))) { dir.create(paste0("temp/",codename,"/")) } | |
#message("Checkpoint passed: chunk 37a") | |
file.copy(list.files(paste0(temp_dir,"/models/keras",model_id), pattern = "_wyniki.csv$", full.names = T, recursive = T, include.dirs = T),paste0("models/",codename,"/",model_id,"_deeplearningresults.csv")) | |
#message("Checkpoint passed: chunk 37") | |
} } | |
sink() | |
sink(type="message") | |
message(paste0("OmicSelector: Finished training network id: ",model_id, " : training_acc: ", tempwyniki[1, "training_Accuracy"], ", testing_acc: ", tempwyniki[1, "test_Accuracy"], " ==> worth_saving: ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc)) | |
#dev.off() | |
tempwyniki2 = cbind(hyperparameters[i,],tempwyniki) | |
tempwyniki2[1,"name"] = paste0(codename,"_", model_id) | |
tempwyniki2[1,"worth_saving"] = as.character(tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc) | |
end_time <- Sys.time() | |
tempwyniki2[1,"training_time"] = as.character(end_time - start_time) | |
tempwyniki2 | |
} | |
saveRDS(final, paste0(output_file,".RDS")) | |
cat("\nAll done!! Ending..\n") | |
if (file.exists(output_file)) { | |
tempfi = fread(output_file) | |
final = rbind(tempfi, final) } | |
fwrite(final, output_file) | |
setwd(oldwd) | |
#options(warn=0) | |
# sprztanie | |
if(clean_temp_files) { | |
keras::k_clear_session() | |
unlink(paste0(normalizePath(temp_dir), "/", dir(temp_dir)), recursive = TRUE) | |
} | |
} | |
#' OmicSelector_transfer_learning_neural_network() | |
#' | |
#' This function allows to perform simple transfer learning of the network created in the process above by freezing the weights on particular layers. | |
#' | |
#' @param selected_miRNAs Which features should be selected? | |
#' @param new_scaling If to generate new scaling for scaled models (it is recommended for recalibration procedures). If set to false you should set old_train_csv_to_restore_scaling - used to recreate initial scaling. | |
#' @param model_path Point zip file to be used as ininital model for transfer learning. | |
#' @param save_scaling Whould you like to save scaling parameters for further reference? Default: yes | |
#' @param freeze_from Which layers to freeze? Set from in https://keras.rstudio.com/reference/freeze_weights.html. If set to 0 or below - the algorithm will omit freezing. | |
#' @param freeze_to Which layers to freeze? Set to in https://keras.rstudio.com/reference/freeze_weights.html | |
#' @param train Provide train set. | |
#' @param test Provide test set. | |
#' @param valid Provide validation set. | |
#' | |
#' @return Results of uncalibrated and recalibrated models. | |
#' | |
#' @export | |
OmicSelector_transfer_learning_neural_network = function(selected_miRNAs = ".", new_scaling = TRUE, model_path = "tcga_models/pancreatic_tcga_165592-1601307872.zip", save_scaling = TRUE, | |
old_train_csv_to_restore_scaling = "../tcga/mixed_train.csv" # used if save_scaling is F and no scaling properties were saved in the model; this regenerates scaling based on previous training set. | |
, freeze_from = 1 | |
, freeze_to = 2, | |
train = fread("circ_data/mixed_train.csv"), | |
test = fread("circ_data/mixed_test.csv"), | |
valid = fread("circ_data/mixed_valid.csv")) { | |
tempwyniki = data.frame() | |
library(keras) | |
# library(OmicSelector) | |
# OmicSelector_load_extension("deeplearning") | |
library(data.table) | |
x_train <- train %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(., selected_miRNAs) } else { dplyr::select(., starts_with("hsa")) } } %>% | |
as.matrix() | |
y_train <- train %>% | |
dplyr::select("Class") %>% | |
as.matrix() | |
y_train[,1] = ifelse(y_train[,1] == "Cancer",1,0) | |
x_test <- test %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
as.matrix() | |
y_test <- test %>% | |
dplyr::select("Class") %>% | |
as.matrix() | |
y_test[,1] = ifelse(y_test[,1] == "Cancer",1,0) | |
x_valid <- valid %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>% | |
as.matrix() | |
y_valid <- valid %>% | |
dplyr::select("Class") %>% | |
as.matrix() | |
y_valid[,1] = ifelse(y_valid[,1] == "Cancer",1,0) | |
##### | |
# INIT MODEL | |
model_path_in_zip = dplyr::filter(unzip(model_path, list = T), grepl("finalmodel.hdf5",Name))[1,"Name"] | |
unzip(model_path, model_path_in_zip, exdir = tempdir()) | |
model_path_unzipped = paste0(tempdir(), "/", model_path_in_zip) | |
init_model = keras::load_model_hdf5(model_path_unzipped) | |
init_model | |
wagi_wyjsciowe = get_weights(init_model) | |
wyniki_path_in_zip = dplyr::filter(unzip(model_path, list = T), grepl("wyniki.csv",Name))[1,"Name"] | |
unzip(model_path, wyniki_path_in_zip, exdir = tempdir()) | |
wyniki_path_unzipped = paste0(tempdir(), "/", wyniki_path_in_zip) | |
pre_conf = data.table::fread(wyniki_path_unzipped) | |
# | |
# If scaled | |
if(as.logical(pre_conf[1,"scaled"])) { | |
if(new_scaling) { | |
x_train_scale = x_train %>% scale() | |
col_mean_train <- attr(x_train_scale, "scaled:center") | |
col_sd_train <- attr(x_train_scale, "scaled:scale") | |
saveRDS(col_mean_train, "transfer_col_mean_train.RDS") | |
saveRDS(col_sd_train, "transfer_col_sd_train.RDS") | |
x_test_scale <- x_test %>% | |
scale(center = col_mean_train, | |
scale = col_sd_train) | |
x_valid_scale <- x_valid %>% | |
scale(center = col_mean_train, | |
scale = col_sd_train) | |
} else { | |
tcga_train = fread(old_train_csv_to_restore_scaling) | |
tcga_train <- tcga_train %>% | |
{ if (selected_miRNAs != ".") { dplyr::select(., selected_miRNAs) } else { dplyr::select(., starts_with("hsa")) } } %>% | |
as.matrix() %>% scale() | |
col_mean_train <- attr(tcga_train, "scaled:center") | |
col_sd_train <- attr(tcga_train, "scaled:scale") | |
saveRDS(col_mean_train, "transfer_col_mean_train.RDS") | |
saveRDS(col_sd_train, "transfer_col_sd_train.RDS") | |
x_train_scale <- x_train %>% | |
scale(center = col_mean_train, | |
scale = col_sd_train) | |
x_test_scale <- x_test %>% | |
scale(center = col_mean_train, | |
scale = col_sd_train) | |
x_valid_scale <- x_valid %>% | |
scale(center = col_mean_train, | |
scale = col_sd_train) | |
} | |
} else { | |
x_train_scale <- x_train | |
x_test_scale <- x_test | |
x_valid_scale <- x_valid | |
} | |
preds = predict(init_model, x_train_scale)[,2] | |
library(pROC) | |
# wybranie odciecia | |
pred = data.frame(`Class` = as.factor(ifelse(y_train==1,"Cancer","Control")), `Pred` = predict(init_model, x_train_scale)) | |
library(cutpointr) | |
cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden) | |
print(summary(cutoff)) | |
ggplot2::ggsave(file = paste0("cutoff.png"), plot(cutoff)) | |
wybrany_cutoff = cutoff$optimal_cutpoint | |
y_train_pred <- predict(object = init_model, x = x_train_scale) | |
y_test_pred <- predict(object = init_model, x = x_test_scale) | |
y_valid_pred <- predict(object = init_model, x = x_valid_scale) | |
#message("Checkpoint passed: chunk 29") | |
#wyniki[i, "training_AUC"] = cutoff$AUC | |
tempwyniki[1, "new_training_AUC"] = cutoff$AUC | |
#wyniki[i, "cutoff"] = wybrany_cutoff | |
tempwyniki[1, "new_cutoff"] = wybrany_cutoff | |
#message("Checkpoint passed: chunk 30") | |
cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n")) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_train) | |
#message("Checkpoint passed: chunk 31") | |
t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred) | |
tempwyniki[1, "new_training_AUC2"] = t1_roc$auc | |
tempwyniki[1, "new_training_AUC_lower95CI"] = as.character(ci(t1_roc))[1] | |
tempwyniki[1, "new_training_AUC_upper95CI"] = as.character(ci(t1_roc))[3] | |
saveRDS(t1_roc, paste0("init_training_ROC.RDS")) | |
#ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_ROC.png"), grid.arrange(plot(t1_roc), nrow =1, top = "Training ROC curve")) | |
#message("Checkpoint passed: chunk 32") | |
tempwyniki[1, "new_training_Accuracy"] = cm_train$overall[1] | |
tempwyniki[1, "new_training_Sensitivity"] = cm_train$byClass[1] | |
tempwyniki[1, "new_training_Specificity"] = cm_train$byClass[2] | |
tempwyniki[1, "new_training_PPV"] = cm_train$byClass[3] | |
tempwyniki[1, "new_training_NPV"] = cm_train$byClass[4] | |
tempwyniki[1, "new_training_F1"] = cm_train$byClass[7] | |
saveRDS(cm_train, paste0("init_cm_train.RDS")) | |
#message("Checkpoint passed: chunk 33") | |
cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n")) | |
pred = data.frame(`Class` = as.factor(test$Class), `Pred` = y_test_pred) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_test) | |
tempwyniki[1, "new_test_Accuracy"] = cm_test$overall[1] | |
tempwyniki[1, "new_test_Sensitivity"] = cm_test$byClass[1] | |
tempwyniki[1, "new_test_Specificity"] = cm_test$byClass[2] | |
tempwyniki[1, "new_test_PPV"] = cm_test$byClass[3] | |
tempwyniki[1, "new_test_NPV"] = cm_test$byClass[4] | |
tempwyniki[1, "new_test_F1"] = cm_test$byClass[7] | |
saveRDS(cm_test, paste0("init_cm_test.RDS")) | |
#message("Checkpoint passed: chunk 34") | |
cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n")) | |
pred = data.frame(`Class` = as.factor(valid$Class), `Pred` = y_valid_pred) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_valid) | |
tempwyniki[1, "new_valid_Accuracy"] = cm_valid$overall[1] | |
tempwyniki[1, "new_valid_Sensitivity"] = cm_valid$byClass[1] | |
tempwyniki[1, "new_valid_Specificity"] = cm_valid$byClass[2] | |
tempwyniki[1, "new_valid_PPV"] = cm_valid$byClass[3] | |
tempwyniki[1, "new_valid_NPV"] = cm_valid$byClass[4] | |
tempwyniki[1, "new_valid_F1"] = cm_valid$byClass[7] | |
saveRDS(cm_valid, paste0("init_cm_valid.RDS")) | |
#message("Checkpoint passed: chunk 35") | |
##### | |
# RECALIBRATION | |
scenario_i = 2 | |
scenario = "trans1" | |
trans_model = clone_model(init_model) | |
early_stop <- callback_early_stopping(monitor = "val_loss", mode="min", patience = 200) | |
cp_callback <- callback_model_checkpoint( | |
filepath = paste0(scenario,"_model.hdf5"), | |
save_best_only = TRUE, period = 10, monitor = "val_loss", | |
verbose = 0 | |
) | |
keras_batch_size = 64 | |
# Opcje transferu: | |
compile(trans_model, loss = 'binary_crossentropy', | |
metrics = 'accuracy', optimizer = pre_conf$optimizer) | |
set_weights(trans_model, get_weights(init_model)) | |
if(freeze_from>0) { | |
freeze_weights(trans_model, from = freeze_from, to = freeze_to) | |
compile(trans_model, loss = 'binary_crossentropy', | |
metrics = 'accuracy', optimizer = pre_conf$optimizer) } | |
# Rekalibracja: | |
history <- fit(trans_model, x = x_train_scale, | |
y = to_categorical(y_train), | |
epochs = 5000, | |
validation_data = list(x_test_scale, to_categorical(y_test)), | |
callbacks = list( | |
cp_callback, | |
#callback_reduce_lr_on_plateau(monitor = "val_loss", factor = 0.1), | |
#callback_model_checkpoint(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")), | |
early_stop), | |
verbose = 0, | |
view_metrics = FALSE, | |
batch_size = keras_batch_size, shuffle = T) | |
history | |
get_weights(init_model) | |
get_weights(trans_model) | |
# Ocena po rekalibracji: | |
preds = predict(trans_model, x_train_scale)[,2] | |
library(pROC) | |
# wybranie odciecia | |
pred = data.frame(`Class` = as.factor(ifelse(y_train==1,"Cancer","Control")), `Pred` = predict(trans_model, x_train_scale)) | |
library(cutpointr) | |
cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden) | |
print(summary(cutoff)) | |
ggplot2::ggsave(file = paste0("trans_cutoff.png"), plot(cutoff)) | |
wybrany_cutoff = cutoff$optimal_cutpoint | |
y_train_pred <- predict(object = trans_model, x = x_train_scale) | |
y_test_pred <- predict(object = trans_model, x = x_test_scale) | |
y_valid_pred <- predict(object = trans_model, x = x_valid_scale) | |
#message("Checkpoint passed: chunk 29") | |
#wyniki[i, "training_AUC"] = cutoff$AUC | |
tempwyniki[scenario_i, "new_training_AUC"] = cutoff$AUC | |
#wyniki[i, "cutoff"] = wybrany_cutoff | |
tempwyniki[scenario_i, "new_cutoff"] = wybrany_cutoff | |
#message("Checkpoint passed: chunk 30") | |
cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n")) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_train) | |
#message("Checkpoint passed: chunk 31") | |
t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred) | |
tempwyniki[scenario_i, "new_training_AUC2"] = t1_roc$auc | |
tempwyniki[scenario_i, "new_training_AUC_lower95CI"] = as.character(ci(t1_roc))[1] | |
tempwyniki[scenario_i, "new_training_AUC_upper95CI"] = as.character(ci(t1_roc))[3] | |
saveRDS(t1_roc, paste0("new_trans_training_ROC.RDS")) | |
#ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_ROC.png"), grid.arrange(plot(t1_roc), nrow =1, top = "Training ROC curve")) | |
#message("Checkpoint passed: chunk 32") | |
tempwyniki[scenario_i, "new_training_Accuracy"] = cm_train$overall[1] | |
tempwyniki[scenario_i, "new_training_Sensitivity"] = cm_train$byClass[1] | |
tempwyniki[scenario_i, "new_training_Specificity"] = cm_train$byClass[2] | |
tempwyniki[scenario_i, "new_training_PPV"] = cm_train$byClass[3] | |
tempwyniki[scenario_i, "new_training_NPV"] = cm_train$byClass[4] | |
tempwyniki[scenario_i, "new_training_F1"] = cm_train$byClass[7] | |
saveRDS(cm_train, paste0("trans_cm_train.RDS")) | |
#message("Checkpoint passed: chunk 33") | |
cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n")) | |
pred = data.frame(`Class` = as.factor(test$Class), `Pred` = y_test_pred) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_test) | |
tempwyniki[scenario_i, "new_test_Accuracy"] = cm_test$overall[1] | |
tempwyniki[scenario_i, "new_test_Sensitivity"] = cm_test$byClass[1] | |
tempwyniki[scenario_i, "new_test_Specificity"] = cm_test$byClass[2] | |
tempwyniki[scenario_i, "new_test_PPV"] = cm_test$byClass[3] | |
tempwyniki[scenario_i, "new_test_NPV"] = cm_test$byClass[4] | |
tempwyniki[scenario_i, "new_test_F1"] = cm_test$byClass[7] | |
saveRDS(cm_test, paste0("trans_cm_test.RDS")) | |
#message("Checkpoint passed: chunk 34") | |
cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n")) | |
pred = data.frame(`Class` = as.factor(valid$Class), `Pred` = y_valid_pred) | |
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control") | |
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer")) | |
cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer") | |
print(cm_valid) | |
tempwyniki[scenario_i, "new_valid_Accuracy"] = cm_valid$overall[1] | |
tempwyniki[scenario_i, "new_valid_Sensitivity"] = cm_valid$byClass[1] | |
tempwyniki[scenario_i, "new_valid_Specificity"] = cm_valid$byClass[2] | |
tempwyniki[scenario_i, "new_valid_PPV"] = cm_valid$byClass[3] | |
tempwyniki[scenario_i, "new_valid_NPV"] = cm_valid$byClass[4] | |
tempwyniki[scenario_i, "new_valid_F1"] = cm_valid$byClass[7] | |
saveRDS(cm_valid, paste0("trans_cm_valid.RDS")) | |
#message("Checkpoint passed: chunk 35") | |
resu = cbind(pre_conf, tempwyniki) | |
resu$type = c("crude","recalibrated") | |
resu$based_on = basename(model_path) | |
resu$based_on_path = model_path | |
new_name = make.names(paste0("transfer-",as.numeric(Sys.time()))) | |
resu$new_model_name = c("",new_name) | |
fwrite(resu, "resu.csv") | |
zip(zipfile = paste0(new_name,".zip"), files = c(list.files(".", pattern = ".hdf5"), list.files(".", pattern = ".csv"), list.files(".", pattern = ".RDS"), list.files(".", pattern = ".png"))) | |
unlink(c(list.files(".", pattern = ".hdf5"), list.files(".", pattern = ".csv"), list.files(".", pattern = ".RDS"), list.files(".", pattern = ".png"))) | |
resu | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
until Rscript 02_deeplearning.R; do | |
echo "Wywalil sie... powtarzam.." | |
sleep 10 | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment