Skip to content

Instantly share code, notes, and snippets.

@kstawiski
Created November 19, 2020 20:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kstawiski/c3dfed76406f50d5cdf78bba07cf049e to your computer and use it in GitHub Desktop.
Save kstawiski/c3dfed76406f50d5cdf78bba07cf049e to your computer and use it in GitHub Desktop.
OmicSelector DeepLearning snippets
library(OmicSelector); set.seed(1);
if(!dir.exists("/OmicSelector/temp")) { dir.create("/OmicSelector/temp") }
OmicSelector_load_extension("deeplearning")
library(data.table)
#use_session_with_seed(1)
#selected_miRNAs = c("hsa.miR.192.5p",
# "hsa.let.7g.5p",
# "hsa.let.7a.5p",
# "hsa.let.7d.5p",
# "hsa.miR.194.5p",
# "hsa.miR.98.5p",
# "hsa.let.7f.5p",
# "hsa.miR.122.5p",
# "hsa.miR.340.5p",
# "hsa.miR.26b.5p"
# ,"hsa.miR.17.5p",
# "hsa.miR.199a.3p",
# "hsa.miR.28.3p",
# "hsa.miR.92a.3p"
#)
balanced = F
nazwa_konfiguracji = "pancreatic_ourseq.csv"
t = data.table::fread("mixed_train.csv")
studyMirs = make.names(c('hsa-miR-192-5p', 'hsa-let-7g-5p', 'hsa-let-7a-5p', 'hsa-let-7d-5p', 'hsa-miR-194-5p', 'hsa-miR-98-5p', 'hsa-let-7f-5p', 'hsa-miR-122-5p', 'hsa-miR-340-5p', 'hsa-miR-26b-5p'))
norm3_1 = make.names(c('hsa-miR-17-5p', 'hsa-miR-92a-3p', 'hsa-miR-199a-3p'))
norm2 = make.names(c('hsa-miR-28-3p', 'hsa-miR-92a-3p'))
normQ = make.names('hsa-miR-23a-3p')
all_norm = make.names(c('hsa-miR-17-5p', 'hsa-miR-92a-3p', 'hsa-miR-199a-3p', 'hsa-miR-23a-3p', 'hsa-miR-28-3p'))
# study
selected_miRNAs = studyMirs
# autoencoder ma softmax na deep feature
hyperparameters_part1 = expand.grid(layer1 = seq(2,10, by = 1), layer2 = c(0), layer3 = c(0),
activation_function_layer1 = c("relu","sigmoid","selu"), activation_function_layer2 = c("relu"), activation_function_layer3 = c("relu"),
dropout_layer1 = c(0, 0.1), dropout_layer2 = c(0), dropout_layer3 = c(0),
layer1_regularizer = c(T,F), layer2_regularizer = c(F), layer3_regularizer = c(F),
optimizer = c("adam","rmsprop","sgd"), autoencoder = c(0), balanced = balanced, formula = as.character(OmicSelector_create_formula(selected_miRNAs))[3], scaled = c(T),
stringsAsFactors = F)
hyperparameters_part2 = expand.grid(layer1 = seq(3,11, by = 2), layer2 = c(seq(3,11, by = 2)), layer3 = c(seq(0,11, by = 2)),
activation_function_layer1 = c("relu","sigmoid","selu"), activation_function_layer2 = c("relu","sigmoid","selu"), activation_function_layer3 = c("relu","sigmoid","selu"),
dropout_layer1 = c(0, 0.1), dropout_layer2 = c(0), dropout_layer3 = c(0),
layer1_regularizer = c(T,F), layer2_regularizer = c(F), layer3_regularizer = c(F),
optimizer = c("adam","rmsprop","sgd"), autoencoder = c(0), balanced = balanced, formula = as.character(OmicSelector_create_formula(selected_miRNAs))[3], scaled = c(T,F),
stringsAsFactors = F)
hyperparameters = rbind(hyperparameters_part1, hyperparameters_part2)
head(hyperparameters)
ile = nrow(hyperparameters)
ile_w_batchu = 250
cat(paste0("\nHow many to check: ", ile))
if(!file.exists(nazwa_konfiguracji)) {
batch_start = 1
} else {
tempres = data.table::fread(nazwa_konfiguracji)
last = as.numeric(str_split(tempres$model_id, "-")[[nrow(tempres)]][1])
if(last >= ile) { stop(paste0(last, " - Task already finished.")) }
batch_start = as.numeric(last)+1
}
ile_batchy = ceiling(nrow(hyperparameters)/ile_w_batchu - batch_start/ile_w_batchu)
cat(paste0("\nBatch start: ", batch_start))
cat(paste0("\nHow many in batch: ", ile_w_batchu))
for (i in 1:ile_batchy) {
batch_end = batch_start + (ile_w_batchu-1)
if (batch_end > ile) { batch_end = ile }
cat(paste0("\n\nProcessing batch no ", i , " of ", ile_batchy, " (", batch_start, "-", batch_end, ")"))
OmicSelector_deep_learning(selected_miRNAs = selected_miRNAs, wd = getwd(), save_threshold_trainacc = 0.75, save_threshold_testacc = 0.7, hyperparameters = hyperparameters,
SMOTE = balanced, start = batch_start, end = batch_end, output_file = nazwa_konfiguracji, keras_threads = 30,
keras_epoch = 2000, keras_patience = 100, automatic_weight = F)
batch_start = batch_end + 1
}
# remotes::install_github("kstawiski/OmicSelector", ref = "dev")
setwd("~/snorlax/DOKTORAT/transfer_learning/pancreatic/transfer_1")
library(OmicSelector)
library(dplyr)
library(ggplot2)
library("scatterplot3d")
OmicSelector_load_extension("deeplearning")
# Get TCGA results
tcga_models = data.table::fread("../tcga/pancreatic_tcga.csv")
ourseq_models = data.table::fread("../circulating_our/pancreatic_ourseq.csv")
tcga_models$task = "TCGA"
ourseq_models$task = "Circulating miRNA-seq"
tcga_models$no_layers = 1
tcga_models$no_layers[tcga_models$layer2 != 0] = 2
tcga_models$no_layers[tcga_models$layer3 != 0] = 3
ourseq_models$no_layers = 1
ourseq_models$no_layers[tcga_models$layer2 != 0] = 2
ourseq_models$no_layers[tcga_models$layer3 != 0] = 3
tcga_models$hyperparameter_id = 1:nrow(tcga_models)
ourseq_models$hyperparameter_id = 1:nrow(ourseq_models)
tcga_models$metaindex = (tcga_models$training_Accuracy + tcga_models$test_Accuracy + tcga_models$valid_Accuracy) / 3
hist(tcga_models$metaindex)
summary(tcga_models$metaindex)
tcga_models = arrange(tcga_models, desc(metaindex))
top_tcga_models = tcga_models[1:100,]
plot(top_tcga_models$training_Accuracy, top_tcga_models$valid_Accuracy)
plot(top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy)
plot(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy)
scatterplot3d(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy, angle = 45, pch = 16,
grid=TRUE, box=FALSE, xlab = "Trainign Accuracy", ylab = "Testing Accuracy", zlab = "Validation Accuracy")
ourseq_models$metaindex = (ourseq_models$training_Accuracy + ourseq_models$test_Accuracy + ourseq_models$valid_Accuracy) / 3
hist(ourseq_models$metaindex)
summary(ourseq_models$metaindex)
ourseq_models = arrange(ourseq_models, desc(metaindex))
top_ourseq_models = ourseq_models[1:100,]
plot(top_ourseq_models$training_Accuracy, top_ourseq_models$valid_Accuracy)
plot(top_ourseq_models$test_Accuracy, top_ourseq_models$valid_Accuracy)
plot(top_ourseq_models$training_Accuracy, top_ourseq_models$test_Accuracy)
scatterplot3d(top_ourseq_models$training_Accuracy, top_ourseq_models$test_Accuracy, top_ourseq_models$valid_Accuracy, angle = 45, pch = 16,
grid=TRUE, box=FALSE, xlab = "Trainign Accuracy", ylab = "Testing Accuracy", zlab = "Validation Accuracy")
# Which task was easier?
temp = rbind(tcga_models, ourseq_models)
boxplot(temp$metaindex ~ temp$task)
p <- ggplot(temp, aes(x=task, y=metaindex)) +
geom_violin(trim=FALSE, fill="gray") +
labs(title="Which task was easier?",x="Task", y = "Metaindex (avarage accuracy across datasets)")+
geom_boxplot(width=0.1)+
theme_classic()
p
t.test(temp$metaindex ~ temp$task)
wilcox.test(temp$metaindex ~ temp$task)
# In which task top 100 models performed better?
temp = rbind(top_tcga_models, top_ourseq_models)
boxplot(temp$metaindex ~ temp$task)
p <- ggplot(temp, aes(x=task, y=metaindex)) +
geom_violin(trim=FALSE, fill="gray") +
labs(title="Which task was easier?",x="Task", y = "Metaindex (avarage accuracy across datasets)")+
geom_boxplot(width=0.1)+
theme_classic()
p
t.test(temp$metaindex ~ temp$task)
wilcox.test(temp$metaindex ~ temp$task)
# Are there significant differences in the structure of top 100 models between tasks?
temp = rbind(top_tcga_models, top_ourseq_models)
# No layers
temp$no_layers = 1
temp$no_layers[temp$layer2 != 0] = 2
temp$no_layers[temp$layer3 != 0] = 3
hist(temp$no_layers)
table(temp$no_layers, temp$task)
chisq.test(temp$no_layers, temp$task)
table(tcga_models$no_layers)
layer3_ref = 81000/nrow(tcga_models)
prop.test(85, 100, p=layer3_ref)
prop.test(80, 100, p=layer3_ref)
OmicSelector_correlation_plot(temp$no_layers, temp$metaindex, "Number of layers", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft")
## Layers
OmicSelector_correlation_plot(tcga_models$layer1, tcga_models$metaindex, "Layer 1", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft")
OmicSelector_correlation_plot(tcga_models$layer2, tcga_models$metaindex, "Layer 2", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft")
OmicSelector_correlation_plot(tcga_models$layer3, tcga_models$metaindex, "Layer 3", "Metaindex", "TCGA: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft")
boxplot(tcga_models$metaindex ~ tcga_models$layer1)
boxplot(tcga_models$metaindex ~ tcga_models$layer2)
boxplot(tcga_models$metaindex ~ tcga_models$layer3)
OmicSelector_correlation_plot(ourseq_models$layer1, ourseq_models$metaindex, "Layer 1", "Metaindex", "Circulating miRNA-seq: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft")
OmicSelector_correlation_plot(ourseq_models$layer2, ourseq_models$metaindex, "Layer 2", "Metaindex", "Circulating miRNA-seq: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft")
OmicSelector_correlation_plot(ourseq_models$layer3, ourseq_models$metaindex, "Layer 3", "Metaindex", "Circulating miRNA-seq: Structure vs. metaindex", yx = F, metoda = "spearman", gdzie_legenda = "bottomleft")
boxplot(ourseq_models$metaindex ~ ourseq_models$layer1)
boxplot(ourseq_models$metaindex ~ ourseq_models$layer2)
boxplot(ourseq_models$metaindex ~ ourseq_models$layer3)
boxplot(temp$layer1 ~ temp$task)
t.test(temp$layer1 ~ temp$task)
boxplot(temp$layer2 ~ temp$task)
t.test(temp$layer2 ~ temp$task)
boxplot(temp$layer3 ~ temp$task)
t.test(temp$layer3 ~ temp$task)
## Activation function
table(temp$activation_function_layer1, temp$task)
chisq.test(temp$activation_function_layer1, temp$task)
table(temp$activation_function_layer2, temp$task)
chisq.test(temp$activation_function_layer2, temp$task)
table(temp$activation_function_layer3, temp$task)
chisq.test(temp$activation_function_layer3, temp$task)
## Dropout
boxplot(tcga_models$metaindex ~ tcga_models$dropout_layer1)
t.test(tcga_models$metaindex ~ tcga_models$dropout_layer1)
boxplot(ourseq_models$metaindex ~ ourseq_models$dropout_layer1)
t.test(ourseq_models$metaindex ~ ourseq_models$dropout_layer1)
table(temp$dropout_layer1, temp$task)
chisq.test(temp$dropout_layer1, temp$task)
## Regularization
boxplot(tcga_models$metaindex ~ tcga_models$layer1_regularizer)
t.test(tcga_models$metaindex ~ tcga_models$layer1_regularizer)
boxplot(ourseq_models$metaindex ~ ourseq_models$layer1_regularizer)
t.test(ourseq_models$metaindex ~ ourseq_models$layer1_regularizer)
table(temp$task, temp$layer1_regularizer)
chisq.test(temp$task, temp$layer1_regularizer)
## Optimizer
boxplot(tcga_models$metaindex ~ tcga_models$optimizer)
a = aov(tcga_models$metaindex ~ tcga_models$optimizer)
summary(a)
TukeyHSD(a)
boxplot(ourseq_models$metaindex ~ ourseq_models$optimizer)
psych::describeBy(ourseq_models$metaindex, ourseq_models$optimizer, digits = 4, mat = T)
a = aov(ourseq_models$metaindex ~ ourseq_models$optimizer)
summary(a)
TukeyHSD(a)
table(temp$task, temp$optimizer)
chisq.test(temp$task, temp$optimizer)
## Scaling
boxplot(tcga_models$metaindex ~ tcga_models$scaled)
t.test(tcga_models$metaindex ~ tcga_models$scaled)
boxplot(ourseq_models$metaindex ~ ourseq_models$scaled)
t.test(ourseq_models$metaindex ~ ourseq_models$scaled)
table(temp$task, temp$scaled)
chisq.test(temp$task, temp$scaled)
# Paired structure analysis
models_paired = top_tcga_models
to_join = ourseq_models[match(top_tcga_models$hyperparameter_id, ourseq_models$hyperparameter_id),] %>% dplyr::rename_all(function(x) paste0("circ_", x))
models_paired = cbind(models_paired, to_join)
suppressMessages(library(PairedData))
suppressMessages(library(profileR))
pd = paired(as.numeric(models_paired$metaindex)[1:50], as.numeric(models_paired$circ_metaindex)[1:50])
colnames(pd) = c("Metaindex TCGA", "Metaindex circulating miRNA-seq")
plot2 = OmicSelector_profileplot(pd, Method.id = models_paired$name[1:50], standardize = F)
plot2
# Transfer network performance analysis
temp1 = data.table::fread("transfered_networks_structure.csv")
temp1$transfer_type = "structure"
temp2 = data.table::fread("transfered_networks_layer1.csv")
temp2$transfer_type = "layer1"
temp3 = data.table::fread("transfered_networks_layer2.csv")
temp3$transfer_type = "layer2"
temp4 = data.table::fread("transfered_networks_layer3.csv")
temp4$transfer_type = "layer3"
transfer_performance = rbind(temp1, temp2, temp3, temp4)
# meta = apply(expand.grid(transfer_performance$transfer_type, transfer_performance$type), 1, paste, collapse="_")
transfer_performance$meta = paste0(transfer_performance$transfer_type, "_", transfer_performance$type)
transfer_performance = transfer_performance[transfer_performance$meta != "layer1_crude",]
transfer_performance = transfer_performance[transfer_performance$meta != "layer2_crude",]
transfer_performance = transfer_performance[transfer_performance$meta != "layer3_crude",]
transfer_performance$meta = as.factor(transfer_performance$meta)
transfer_performance$meta = factor(transfer_performance$meta, levels = c("structure_crude", "layer1_recalibrated", "layer2_recalibrated", "layer3_recalibrated", "structure_recalibrated"))
transfer_performance$meta
transfer_performance$metaindex = (transfer_performance$new_training_Accuracy + transfer_performance$new_test_Accuracy + transfer_performance$new_valid_Accuracy) / 3
boxplot(transfer_performance$metaindex ~ transfer_performance$meta)
oneway.test(transfer_performance$metaindex ~ transfer_performance$meta)
a = aov(transfer_performance$metaindex ~ transfer_performance$meta)
TukeyHSD(a)
psych::describeBy(transfer_performance$metaindex, transfer_performance$meta, mat = T, digits = 4)
merged = merge(transfer_performance, ourseq_models,
by = c('layer1','layer2','layer3','activation_function_layer1','activation_function_layer2','activation_function_layer3','dropout_layer1','dropout_layer2','dropout_layer3','layer1_regularizer','layer2_regularizer','layer3_regularizer','optimizer','autoencoder','balanced', 'scaled'),
suffixes = c("_TCGA", "_circ"), all.x = TRUE)
colnames(merged)
merged$metaindex_circ
merged$metaindex = (transfer_performance$new_training_Accuracy + transfer_performance$new_test_Accuracy + transfer_performance$new_valid_Accuracy) / 3
# merged$metaindex11 = (merged$new_test_Accuracy + merged$new_valid_Accuracy) / 2
# merged$metaindex11_circ = (merged$test_Accuracy_circ + merged$valid_Accuracy_circ ) / 2
merged$metaindex11 = (merged$new_test_F1 + merged$new_valid_F1) / 2
merged$metaindex11_circ = (merged$test_F1_circ + merged$valid_F1_circ ) / 2
boxplot(merged$metaindex_circ, merged$metaindex)
t.test(merged$metaindex_circ, merged$metaindex)
boxplot(merged$metaindex_circ[merged$meta == "structure_recalibrated"], merged$metaindex[merged$meta == "structure_recalibrated"])
t.test(merged$metaindex_circ[merged$meta == "structure_recalibrated"], merged$metaindex[merged$meta == "structure_recalibrated"])
# boxplot(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"])
# t.test(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"])
#
# boxplot(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"])
# t.test(merged$metaindex11_circ[merged$meta == "structure_recalibrated"], merged$metaindex11[merged$meta == "structure_recalibrated"])
#
# boxplot(merged$metaindex11_circ, merged$metaindex11)
# t.test(merged$metaindex11_circ, merged$metaindex11)
merged$delta = merged$metaindex - merged$metaindex_circ
a = aov(merged$delta ~ merged$meta)
summary(a)
boxplot(merged$delta ~ merged$meta)
p <- ggplot(merged, aes(x=meta, y=delta)) +
geom_violin(trim=FALSE, fill="gray") +
labs(title="Benefit from transfering the weights and structure",x="Type of transfer learning", y = "Difference in metaindex (avarage accuracy across datasets)")+
geom_boxplot(width=0.1) +
geom_hline(aes(yintercept=0), linetype="dashed") +
ylim(-0.3, 0.3) +
theme_classic()
p
hist(merged$delta)
which(merged$delta>0)
merged$benefit = merged$delta>0
library(lme4)
library(lmerTest)
model = lmer(delta ~ meta + layer1 + layer2 + layer3 + activation_function_layer1 + activation_function_layer2 + activation_function_layer3 + dropout_layer1 + layer1_regularizer + optimizer + (1|merged$hyperparameter_id),
data=merged,
REML=TRUE)
anova(model)
confint(model)
summary(model)
library(tidyverse)
broom.mixed::tidy(model, conf.int = TRUE)
if (require("brms") && require("dotwhisker") && require("ggplot2")) {
L <- load(system.file("extdata", "brms_example.rda", package="broom.mixed"))
gg0 <- (broom.mixed::tidy(model)
## disambiguate
%>% mutate(term=ifelse(grepl("sd__(Int",term,fixed=TRUE),
paste(group,term,sep="."),
term))
%>% dwplot
)
gg0 + geom_vline(xintercept=0,lty=2)
}
OmicSelector_correlation_plot(merged$metaindex, merged$delta, "Transfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F)
OmicSelector_correlation_plot(merged$metaindex_circ, merged$delta, "Untransfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F)
OmicSelector_correlation_plot(merged$metaindex_TCGA, merged$delta, "Initial TCGA network metaindex", "Difference in metaindex after training", "Comparison", yx = F)
# linearMod <- glm(benefit ~ as.factor(transfer_type) + layer1 + layer2 + layer3 + as.factor(activation_function_layer1) + as.factor(activation_function_layer2)
# + as.factor(activation_function_layer3) + as.numeric(as.factor(dropout_layer1)) + as.numeric(as.factor(layer1_regularizer)) + as.factor(optimizer),
# data=merged)
# summary(linearMod)
# library(caret)
# library(party)
# library(partykit)
# library(rattle)
# # Argumenty: paste0(colnames(transfer_performance), collapse = " + ")
# merged$benefit = as.factor(ifelse(as.factor(as.numeric(as.factor(merged$benefit))) == 1, "No", "Benefit"))
# train_control<- trainControl(method="repeatedcv", number=10, repeats = 10, classProbs=TRUE)
# model = caret::train(benefit ~ transfer_type + layer1 + layer2 + layer3 + activation_function_layer1 + activation_function_layer2 + activation_function_layer3 + dropout_layer1 + dropout_layer2 + dropout_layer3 + layer1_regularizer + layer2_regularizer + layer3_regularizer + optimizer + autoencoder + balanced + scaled,
# method = "rpart2", data = merged, trControl=train_control, tuneLength=20, maxdepth = 2)
# model
# fancyRpartPlot(model$finalModel)
# plot(model$finalModel)
# Finalne podejscie: top 100 sieci po transfer learningu wzgledem metaindexu, czy skorzystaly?
merged2 = merged[order(merged$metaindex, decreasing = T)]
table(merged$benefit[1:100])
merged2 = merged2[1:100,]
boxplot(merged2$metaindex_circ, merged2$metaindex)
t.test(merged2$metaindex_circ, merged2$metaindex, paired = T)
hist(merged$delta)
shapiro.test(merged$delta)
hist(merged2$delta)
shapiro.test(merged2$delta)
t = paired(merged2$metaindex_circ, merged2$metaindex)
a = aov(merged2$delta ~ merged2$meta)
summary(a)
boxplot(merged2$delta ~ merged2$meta, ylab = "Difference in metaindex (mean accuracy on training, testing and validation datasets)", xlab = "Method of transfer")
TukeyHSD(a)
p <- ggplot(merged2, aes(x=meta, y=delta)) +
geom_violin(trim=FALSE, fill="gray") +
labs(title="Benefit from transfering the weights and structure",x="Type of transfer learning", y = "Difference in metaindex (avarage accuracy across datasets)")+
geom_boxplot(width=0.1) +
geom_hline(aes(yintercept=0), linetype="dashed") +
ylim(-0.3, 0.3) +
theme_classic()
p
model = lmer(delta ~ -1 + meta + layer1 + layer2 + layer3 + activation_function_layer1 + activation_function_layer2 + activation_function_layer3 + dropout_layer1 + layer1_regularizer + optimizer + (1|hyperparameter_id),
data=merged2)
anova(model)
confint(model)
summary(model)
library(tidyverse)
broom.mixed::tidy(model, conf.int = TRUE)
if (require("brms") && require("dotwhisker") && require("ggplot2")) {
L <- load(system.file("extdata", "brms_example.rda", package="broom.mixed"))
gg0 <- (broom.mixed::tidy(model)
## disambiguate
%>% mutate(term=ifelse(grepl("sd__(Int",term,fixed=TRUE),
paste(group,term,sep="."),
term))
%>% dwplot
)
gg0 + geom_vline(xintercept=0,lty=2) +
theme_classic()
}
OmicSelector_correlation_plot(merged2$metaindex, merged2$delta, "Transfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F)
OmicSelector_correlation_plot(merged2$metaindex_circ, merged2$delta, "Untransfered network metaindex", "Difference in metaindex after training", "Comparison", yx = F)
OmicSelector_correlation_plot(merged2$metaindex_TCGA, merged2$delta, "Initial TCGA network metaindex", "Difference in metaindex after training", "Comparison", yx = F)
# Best network analysis
best_recalibrated = merged[which(merged$metaindex == max(merged$metaindex)),,]
best_circ = ourseq_models[which(ourseq_models$metaindex == max(ourseq_models$metaindex)),]
best_recalibrated
best_circ
# remotes::install_github("kstawiski/OmicSelector", ref = "dev")
setwd("~/snorlax/DOKTORAT/transfer_learning/pancreatic/transfer_1")
library(OmicSelector)
library(dplyr)
library("scatterplot3d")
OmicSelector_load_extension("deeplearning")
# Get TCGA results
tcga_models = data.table::fread("../tcga/pancreatic_tcga.csv")
ourseq_models = data.table::fread("../circulating_our/pancreatic_ourseq.csv")
tcga_models$metaindex = (tcga_models$training_Accuracy + tcga_models$test_Accuracy + tcga_models$valid_Accuracy) / 3
hist(tcga_models$metaindex)
summary(tcga_models$metaindex)
tcga_models = arrange(tcga_models, desc(metaindex))
top_tcga_models = tcga_models[1:100,]
plot(top_tcga_models$training_Accuracy, top_tcga_models$valid_Accuracy)
plot(top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy)
plot(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy)
scatterplot3d(top_tcga_models$training_Accuracy, top_tcga_models$test_Accuracy, top_tcga_models$valid_Accuracy, angle = 45, pch = 16,
grid=TRUE, box=FALSE, xlab = "Trainign Accuracy", ylab = "Testing Accuracy", zlab = "Validation Accuracy")
data.table::fwrite(top_tcga_models, "top_tcga_models.csv")
transfered_networks_structure = data.frame()
transfered_networks_layer1 = data.frame()
transfered_networks_layer2 = data.frame()
transfered_networks_layer3 = data.frame()
for(i in 1:100) {
tcga_model_modelfile = paste0("../tcga/models/pancreatic_tcga/pancreatic_tcga_", top_tcga_models$model_id[i], ".zip")
file.exists(tcga_model_modelfile)
try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 0, freeze_to = 0,
train = data.table::fread("../circulating_our/mixed_train.csv"),
test = data.table::fread("../circulating_our/mixed_test.csv"),
valid = data.table::fread("../circulating_our/mixed_valid.csv"))
transfered_networks_structure = rbind.fill(transfered_networks_structure, temp2) })
try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 1, freeze_to = 1,
train = data.table::fread("../circulating_our/mixed_train.csv"),
test = data.table::fread("../circulating_our/mixed_test.csv"),
valid = data.table::fread("../circulating_our/mixed_valid.csv"))
transfered_networks_layer1 = rbind.fill(transfered_networks_layer1, temp2) })
try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 1, freeze_to = 2,
train = data.table::fread("../circulating_our/mixed_train.csv"),
test = data.table::fread("../circulating_our/mixed_test.csv"),
valid = data.table::fread("../circulating_our/mixed_valid.csv"))
transfered_networks_layer2 = rbind.fill(transfered_networks_layer2, temp2) })
try({ temp2 = OmicSelector_transfer_learning_neural_network(model_path = tcga_model_modelfile, freeze_from = 1, freeze_to = 3,
train = data.table::fread("../circulating_our/mixed_train.csv"),
test = data.table::fread("../circulating_our/mixed_test.csv"),
valid = data.table::fread("../circulating_our/mixed_valid.csv"))
transfered_networks_layer3 = rbind.fill(transfered_networks_layer3, temp2) })
if(!dir.exists("transfer_models")) { dir.create("transfer_models") }
file.copy(list.files(".", "\\.zip$"), "transfer_models/")
file.remove(list.files(".", "\\.zip$"))
}
data.table::fwrite(transfered_networks_structure, "transfered_networks_structure.csv")
data.table::fwrite(transfered_networks_layer1, "transfered_networks_layer1.csv")
data.table::fwrite(transfered_networks_layer2, "transfered_networks_layer2.csv")
data.table::fwrite(transfered_networks_layer3, "transfered_networks_layer3.csv")
# -*- coding: utf-8 -*-
suppressMessages(library(plyr))
suppressMessages(library(dplyr))
suppressMessages(library(caret))
suppressMessages(library(epiDisplay))
suppressMessages(library(pROC))
suppressMessages(library(ggplot2))
suppressMessages(library(DMwR))
suppressMessages(library(ROSE))
suppressMessages(library(gridExtra))
suppressMessages(library(gplots))
suppressMessages(library(devtools))
suppressMessages(library(stringr))
suppressMessages(library(data.table))
suppressMessages(library(tidyverse))
library(OmicSelector)
suppressMessages(library(funModeling))
message("OmicSelector: DeepLearning extension loaded.")
if(!dir.exists(paste0("models"))) { dir.create(paste0("models")) }
if(!dir.exists(paste0("temp"))) { dir.create(paste0("temp")) }
OmicSelector_keras_create_model <- function(i, hyperparameters, how_many_features = ncol(x_train_scale)) {
# tempmodel <- keras_model_sequential() %>%
# { if(hyperparameters[i,10]==T) { layer_dense(. , units = hyperparameters[i,1], kernel_regularizer = regularizer_l2(l = 0.001),
# activation = hyperparameters[i,4], input_shape = c(ncol(x_train_scale))) } else
# { layer_dense(. , units = hyperparameters[i,1], activation = hyperparameters[i,4],
# input_shape = c(ncol(x_train_scale))) } } %>%
# { if(hyperparameters[i,7]>0) { layer_dropout(. , rate = hyperparameters[i,7]) } else { . } } %>%
# { if(hyperparameters[i,2]>0) {
# if(hyperparameters[i,11]==T) { layer_dense(. , units = hyperparameters[i,2], activation = hyperparameters[i,5],
# kernel_regularizer = regularizer_l2(l = 0.001)) } else {
# layer_dense(units = hyperparameters[i,2], activation = hyperparameters[i,5]) } } } %>%
# { if(hyperparameters[i,8]>0) { layer_dropout(rate = hyperparameters[i,8]) } else { . } } %>%
# { if(hyperparameters[i,3]>0) {
# if(hyperparameters[i,12]==T) { layer_dense(units = hyperparameters[i,3], activation = hyperparameters[i,6],
# kernel_regularizer = regularizer_l2(l = 0.001)) } else
# {layer_dense(units = hyperparameters[i,3], activation = hyperparameters[i,6])} } else { . } } %>%
# { if(hyperparameters[i,9]>0) { layer_dropout(rate = hyperparameters[i,9]) } else { . } } %>%
# layer_dense(units = 1, activation = 'sigmoid')
library(keras)
tempmodel <- keras_model_sequential()
if(hyperparameters[i,10]==T) { layer_dense(tempmodel , units = hyperparameters[i,1], kernel_regularizer = regularizer_l1(l = 0.001),
activation = hyperparameters[i,4], input_shape = c(how_many_features)) } else
{ layer_dense(tempmodel , units = hyperparameters[i,1], activation = hyperparameters[i,4],
input_shape = c(how_many_features)) }
if(hyperparameters[i,7]>0) { layer_dropout(tempmodel , rate = hyperparameters[i,7]) }
if(hyperparameters[i,2]>0) {
if(hyperparameters[i,11]==T) { layer_dense(tempmodel , units = hyperparameters[i,2], activation = hyperparameters[i,5],
kernel_regularizer = regularizer_l1(l = 0.001)) } else
{layer_dense(tempmodel, units = hyperparameters[i,2], activation = hyperparameters[i,5]) } }
if(hyperparameters[i,2]>0 & hyperparameters[i,8]>0) { layer_dropout(tempmodel, rate = hyperparameters[i,8]) }
if(hyperparameters[i,3]>0) {
if(hyperparameters[i,12]==T) { layer_dense(tempmodel, units = hyperparameters[i,3], activation = hyperparameters[i,6],
kernel_regularizer = regularizer_l1(l = 0.001)) } else
{ layer_dense(tempmodel, units = hyperparameters[i,3], activation = hyperparameters[i,6])} }
if(hyperparameters[i,3]>0 & hyperparameters[i,9]>0) { layer_dropout(rate = hyperparameters[i,9]) }
layer_dense(tempmodel, units = 2, activation = 'softmax')
print(tempmodel)
dnn_class_model = keras::compile(tempmodel, optimizer = hyperparameters[i,13],
loss = 'binary_crossentropy',
metrics = 'accuracy')
}
# autoencoder ma softmax na deep feature
# jesli zmienna autoencoder >0 budujemy autoencoder 3-wartwowy, jesli <0 budujemy autoencoder 3-warstwowy z regularyzacja (sparse autoencoder)
OmicSelector_deep_learning = function(selected_miRNAs = ".", wd = getwd(),
SMOTE = F, keras_batch_size = 64, clean_temp_files = T,
save_threshold_trainacc = 0.85, save_threshold_testacc = 0.8, keras_epochae = 5000,
keras_epoch = 2000, keras_patience = 50,
hyperparameters = expand.grid(layer1 = seq(3,11, by = 2), layer2 = c(0,seq(3,11, by = 2)), layer3 = c(0,seq(3,11, by = 2)),
activation_function_layer1 = c("relu","sigmoid"), activation_function_layer2 = c("relu","sigmoid"), activation_function_layer3 = c("relu","sigmoid"),
dropout_layer1 = c(0, 0.1), dropout_layer2 = c(0, 0.1), dropout_layer3 = c(0),
layer1_regularizer = c(T,F), layer2_regularizer = c(T,F), layer3_regularizer = c(T,F),
optimizer = c("adam","rmsprop","sgd"), autoencoder = c(0,7,-7), balanced = SMOTE, formula = as.character(OmicSelector_create_formula(selected_miRNAs))[3], scaled = c(T,F),
stringsAsFactors = F), add_features_to_predictions = F,
keras_threads = ceiling(parallel::detectCores()/2), start = 1, end = nrow(hyperparameters), output_file = "deeplearning_results.csv", save_all_vars = F, automatic_weight = F)
{
# library(OmicSelector)
# OmicSelector_load_extension("deeplearning")
codename = sub(pattern = "(.*)\\..*$", replacement = "\\1", basename(output_file))
#options(warn=-1)
oldwd = getwd()
setwd = setwd(wd)
set.seed(1)
if(dir.exists("/OmicSelector")) {
if(!dir.exists("/OmicSelector/temp")) { dir.create("/OmicSelector/temp") }
temp_dir = "/OmicSelector/temp/"
} else {
temp_dir = tempdir()
}
if(!dir.exists("temp")) { dir.create("temp") }
if(!dir.exists("models")) { dir.create("models") }
options(bitmapType = 'cairo', device = 'png')
library(plyr)
library(dplyr)
library(keras)
library(foreach)
library(doParallel)
#library(doSNOW)
library(data.table)
fwrite(hyperparameters, paste0("hyperparameters_",output_file))
dane = OmicSelector_load_datamix(wd = wd, replace_smote = F); train = dane[[1]]; test = dane[[2]]; valid = dane[[3]]; train_smoted = dane[[4]]; trainx = dane[[5]]; trainx_smoted = dane[[6]]
if (SMOTE == T) { train = train_smoted }
message("Checkpoint passed: load lib and data")
#cores=detectCores()
cat(paste0("\nTemp dir: ", temp_dir, "\n"))
cat("\nStarting preparing cluster..\n")
#cl <- makePSOCKcluster(keras_threads) #not to overload your computer
cl = makeCluster(keras_threads, outfile=paste0("temp/", ceiling(as.numeric(Sys.time())), "deeplearning_cluster.log"))
registerDoParallel(cl)
on.exit(stopCluster(cl))
cat("\nCluster prepared..\n")
#message("Checkpoint passed: cluster prepared")
#args= names(mget(ls()))
#export = export[!export %in% args]
# tu musi isc iteracja
cat(paste0("\nStarting parallel loop.. There are: ", end-start+1, " hyperparameter sets to be checked.\n"))
final <- foreach(i=as.numeric(start):as.numeric(end), .combine=rbind, .verbose=F, .inorder=F, .export = ls()
#,.packages = loadedNamespaces()
) %dopar% {
Sys.setenv(TF_FORCE_GPU_ALLOW_GROWTH = 'true')
library(OmicSelector)
OmicSelector_load_extension("deeplearning")
if(!dir.exists(paste0(temp_dir,"/models"))) { dir.create(paste0(temp_dir,"/models")) }
if(!dir.exists(paste0(temp_dir,"/temp"))) { dir.create(paste0(temp_dir,"/temp")) }
start_time <- Sys.time()
library(keras)
library(ggplot2)
library(dplyr)
library(data.table)
set.seed(1)
# library(tensorflow)
# gpu = tf$test$is_gpu_available()
# if(gpu) {
# gpux <- tf$config$experimental$get_visible_devices('GPU')[[1]]
# tf$config$experimental$set_memory_growth(device = gpux, enable = TRUE)
# py_run_string("gpus = tf.config.experimental.list_physical_devices('GPU')")
# py_run_string("tf.config.experimental.set_virtual_device_configuration(gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=2048)])")
# }
cat("\nStarting hyperparameters..\n")
print(hyperparameters[i,])
message(paste0("OmicSelector: Starting training network no ", i, "."))
message(paste0(hyperparameters[i,], collapse = ", "))
options(bitmapType = 'cairo', device = 'png')
model_id = paste0(format(i, scientific = FALSE), "-", ceiling(as.numeric(Sys.time())))
if(SMOTE == T) { model_id = paste0(format(i, scientific = FALSE), "-SMOTE-", ceiling(as.numeric(Sys.time()))) }
tempwyniki = data.frame(model_id=model_id)
tempwyniki[1, "model_id"] = model_id
#message("Checkpoint passed: chunk 1")
if(!dir.exists(paste0(temp_dir,"/models"))) { dir.create(paste0(temp_dir,"/models"))}
if(!dir.exists(paste0(temp_dir,"/models/keras",model_id))) { dir.create(paste0(temp_dir,"/models/keras",model_id))}
cat(paste0("\nTraining model: ",temp_dir,"/models/keras",model_id,"\n"))
#message("Checkpoint passed: chunk 2")
#pdf(paste0(temp_dir,"/models/keras",model_id,"/plots.pdf"), paper="a4")
con <- file(paste0(temp_dir,"/models/keras",model_id,"/training.log"))
sink(con, append=TRUE, split =TRUE)
sink(con, append=TRUE, type="message")
early_stop <- callback_early_stopping(monitor = "val_loss", mode="min", patience = keras_patience)
cp_callback <- callback_model_checkpoint(
filepath = paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5"),
save_best_only = TRUE, period = 10, monitor = "val_loss",
verbose = 0
)
ae_cp_callback <- callback_model_checkpoint(
filepath = paste0(temp_dir,"/models/keras",model_id,"/autoencoderweights.hdf5"),
save_best_only = TRUE, save_weights_only = T, period = 10, monitor = "val_loss",
verbose = 0
)
x_train <- train %>%
{ if (selected_miRNAs != ".") { dplyr::select(., selected_miRNAs) } else { dplyr::select(., starts_with("hsa")) } } %>%
as.matrix()
y_train <- train %>%
dplyr::select("Class") %>%
as.matrix()
y_train[,1] = ifelse(y_train[,1] == "Cancer",1,0)
x_test <- test %>%
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>%
as.matrix()
y_test <- test %>%
dplyr::select("Class") %>%
as.matrix()
y_test[,1] = ifelse(y_test[,1] == "Cancer",1,0)
x_valid <- valid %>%
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>%
as.matrix()
y_valid <- valid %>%
dplyr::select("Class") %>%
as.matrix()
y_valid[,1] = ifelse(y_valid[,1] == "Cancer",1,0)
#message("Checkpoint passed: chunk 3")
if(automatic_weight) {
counter=funModeling::freq(to_categorical(y_train), plot=F) %>% select(var, frequency)
majority=max(counter$frequency)
counter$weight=ceil(majority/counter$frequency)
l_weights=setNames(as.list(counter$weight), counter$var)
message(l_weights)
} else {
l_weights = NULL
}
if(hyperparameters[i, 17] == T) {
x_train_scale = x_train %>% scale()
col_mean_train <- attr(x_train_scale, "scaled:center")
saveRDS(col_mean_train, paste0(temp_dir,"/models/keras",model_id,"/col_mean_train.RDS"))
col_sd_train <- attr(x_train_scale, "scaled:scale")
saveRDS(col_sd_train, paste0(temp_dir,"/models/keras",model_id,"/col_sd_train.RDS"))
x_test_scale <- x_test %>%
scale(center = col_mean_train,
scale = col_sd_train)
x_valid_scale <- x_valid %>%
scale(center = col_mean_train,
scale = col_sd_train)
}
else {
x_train_scale = x_train
x_test_scale <- x_test
x_valid_scale <- x_valid
}
input_layer <- layer_input(shape = c(ncol(x_train_scale)))
#message("Checkpoint passed: chunk 4")
#psych::describe(x_test_scale)
# czy autoenkoder?
if(hyperparameters[i,14] != 0) {
n1 <- hyperparameters[i,14]
n3 <- ncol(x_train_scale)
n2 = 7
if(ceiling(n3/2) > 7) { n2 = ceiling(n3/2) }
if (hyperparameters[i,14]>0) {
encoder <-
input_layer %>%
layer_dense(units = n2, activation = hyperparameters[i,6]) %>%
layer_dense(units = n1, activation = "softmax") # dimensions of final encoding layer
decoder <- encoder %>%
layer_dense(units = n2, activation = hyperparameters[i,6]) %>%
layer_dense(units = n3, hyperparameters[i,6]) # dimension of original variable
}
else {
n1 = -n1 # korekta dla ujemnej wartosci w hiperparametrach
encoder <-
input_layer %>%
layer_dense(units = n2, activation = hyperparameters[i,6], kernel_regularizer = regularizer_l1(l = 0.01)) %>%
layer_dense(units = n1, activation = "softmax", kernel_regularizer = regularizer_l1(l = 0.01)) # dimensions of final encoding layer
decoder <- encoder %>%
layer_dense(units = n2, activation = hyperparameters[i,6], kernel_regularizer = regularizer_l1(l = 0.01)) %>%
layer_dense(units = n3, hyperparameters[i,6]) # dimension of original variable
}
ae_model <- keras_model(inputs = input_layer, outputs = decoder)
ae_model
ae_model %>%
keras::compile(loss = "mean_absolute_error",
optimizer = hyperparameters[i,13],
metrics = c("mean_squared_error"))
summary(ae_model)
#message("Checkpoint passed: chunk 5")
#ae_tempmodelfile = tempfile()
ae_history <- fit(ae_model, x = x_train_scale,
y = x_train_scale,
epochs = keras_epochae, batch_size = keras_batch_size,
shuffle = T, verbose = 0,
view_metrics = FALSE,
validation_data = list(x_test_scale, x_test_scale),
callbacks = list(ae_cp_callback, early_stop))
#file.copy(ae_tempmodelfile, paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5"), overwrite = T)
#ae_history_df <- as.data.frame(ae_history)
#fwrite(ae_history_df, paste0(temp_dir,"/models/keras",model_id,"/autoencoder_history_df.csv.gz"))
saveRDS(ae_history, paste0(temp_dir,"/models/keras",model_id,"/ae_history.RDS"))
compare_cx <- data.frame(
train_loss = ae_history$metrics$loss,
test_loss = ae_history$metrics$val_loss
) %>%
tibble::rownames_to_column() %>%
mutate(rowname = as.integer(rowname)) %>%
tidyr::gather(key = "type", value = "value", -rowname)
plot1 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) +
geom_line() +
xlab("epoch") +
ylab("loss") +
theme_bw()
#message("Checkpoint passed: chunk 6")
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_autoencoder.png"), grid.arrange(plot1, nrow =1, top = "Training of autoencoder"))
cat("\n- Reloading autoencoder to get weights...\n")
encoder_model <- keras_model(inputs = input_layer, outputs = encoder)
encoder_model %>% load_model_weights_hdf5(paste0(temp_dir,"/models/keras",model_id,"/autoencoderweights.hdf5"),
skip_mismatch = T,
by_name = T)
cat("\n- Autoencoder saving...\n")
save_model_hdf5(encoder_model, paste0(temp_dir,"/models/keras",model_id,"/autoencoder.hdf5"))
cat("\n- Creating deep features...\n")
#message("Checkpoint passed: chunk 7")
ae_x_train_scale <- encoder_model %>%
predict(x_train_scale) %>%
as.matrix()
fwrite(ae_x_train_scale, paste0(temp_dir,"/models/keras",model_id,"/deepfeatures_train.csv"))
ae_x_test_scale <- encoder_model %>%
predict(x_test_scale) %>%
as.matrix()
fwrite(ae_x_test_scale, paste0(temp_dir,"/models/keras",model_id,"/deepfeatures_test.csv"))
ae_x_valid_scale <- encoder_model %>%
predict(x_valid_scale) %>%
as.matrix()
fwrite(ae_x_valid_scale, paste0(temp_dir,"/models/keras",model_id,"/deepfeatures_valid.csv"))
# # podmiana eby nie edytowa kodu
x_train_scale = as.matrix(ae_x_train_scale)
x_test_scale = as.matrix(ae_x_test_scale)
x_valid_scale = as.matrix(ae_x_valid_scale)
cat("\n- Training model based on deep features...\n")
dnn_class_model <- OmicSelector_keras_create_model(i, hyperparameters = hyperparameters, how_many_features = ncol(x_train_scale))
#message("Checkpoint passed: chunk 8")
message("Starting training...")
#tempmodelfile = tempfile()
history <- fit(dnn_class_model, x = x_train_scale,
y = to_categorical(y_train),
epochs = keras_epoch,
validation_data = list(x_test_scale, to_categorical(y_test)),
callbacks = list(cp_callback, early_stop),
verbose = 0,
view_metrics = FALSE,
batch_size = keras_batch_size, shuffle = T, class_weight = l_weights)
message(history)
print(history)
#message("Checkpoint passed: chunk 8")
#plot(history, col="black")
#history_df <- as.data.frame(history)
# fwrite(history_df, paste0(temp_dir,"/models/keras",model_id,"/history_df.csv.gz"))
saveRDS(history, paste0(temp_dir,"/models/keras",model_id,"/history.RDS"))
#message("Checkpoint passed: chunk 9")
cat("\n- Saving history and plots...\n")
compare_cx <- data.frame(
train_loss = history$metrics$loss,
test_loss = history$metrics$val_loss
) %>%
tibble::rownames_to_column() %>%
mutate(rowname = as.integer(rowname)) %>%
tidyr::gather(key = "type", value = "value", -rowname)
plot1 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) +
geom_line() +
xlab("Epoch") +
ylab("Loss") +
theme_bw()
#message("Checkpoint passed: chunk 10")
compare_cx <- data.frame(
train_accuracy = history$metrics$accuracy,
test_accuracy = history$metrics$val_accuracy
) %>%
tibble::rownames_to_column() %>%
mutate(rowname = as.integer(rowname)) %>%
tidyr::gather(key = "type", value = "value", -rowname)
plot2 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) +
geom_line() +
xlab("Epoch") +
ylab("Accuracy") +
theme_bw()
#message("Checkpoint passed: chunk 12")
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training.png"), grid.arrange(plot1, plot2, nrow =2, top = "Training of final neural network"))
# ewaluacja
cat("\n- Saving final model...\n")
dnn_class_model = load_model_hdf5(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5"))
y_train_pred <- predict(object = dnn_class_model, x = x_train_scale)
y_test_pred <- predict(object = dnn_class_model, x = x_test_scale)
y_valid_pred <- predict(object = dnn_class_model, x = x_valid_scale)
#message("Checkpoint passed: chunk 13")
# wybranie odciecia
pred = data.frame(`Class` = train$Class, `Pred` = y_train_pred)
library(cutpointr)
cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden)
summary(cutoff)
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/cutoff.png"), plot(cutoff))
wybrany_cutoff = cutoff$optimal_cutpoint
#wyniki[i, "training_AUC"] = cutoff$AUC
tempwyniki[1, "training_AUC"] = cutoff$AUC
#wyniki[i, "cutoff"] = wybrany_cutoff
tempwyniki[1, "cutoff"] = wybrany_cutoff
cat(paste0("\n\n---- TRAINING AUC: ",cutoff$AUC," ----\n\n"))
cat(paste0("\n\n---- OPTIMAL CUTOFF: ",wybrany_cutoff," ----\n\n"))
cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n"))
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_train)
#message("Checkpoint passed: chunk 14")
t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred)
tempwyniki[1, "training_AUC2"] = t1_roc$auc
tempwyniki[1, "training_AUC_lower95CI"] = as.character(ci(t1_roc))[1]
tempwyniki[1, "training_AUC_upper95CI"] = as.character(ci(t1_roc))[3]
saveRDS(t1_roc, paste0(temp_dir,"/models/keras",model_id,"/training_ROC.RDS"))
#message("Checkpoint passed: chunk 15")
tempwyniki[1, "training_Accuracy"] = cm_train$overall[1]
tempwyniki[1, "training_Sensitivity"] = cm_train$byClass[1]
tempwyniki[1, "training_Specificity"] = cm_train$byClass[2]
tempwyniki[1, "training_PPV"] = cm_train$byClass[3]
tempwyniki[1, "training_NPV"] = cm_train$byClass[4]
tempwyniki[1, "training_F1"] = cm_train$byClass[7]
saveRDS(cm_train, paste0(temp_dir,"/models/keras",model_id,"/cm_train.RDS"))
#message("Checkpoint passed: chunk 16")
cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n"))
pred = data.frame(`Class` = test$Class, `Pred` = y_test_pred)
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_test)
tempwyniki[1, "test_Accuracy"] = cm_test$overall[1]
tempwyniki[1, "test_Sensitivity"] = cm_test$byClass[1]
tempwyniki[1, "test_Specificity"] = cm_test$byClass[2]
tempwyniki[1, "test_PPV"] = cm_test$byClass[3]
tempwyniki[1, "test_NPV"] = cm_test$byClass[4]
tempwyniki[1, "test_F1"] = cm_test$byClass[7]
saveRDS(cm_test, paste0(temp_dir,"/models/keras",model_id,"/cm_test.RDS"))
#message("Checkpoint passed: chunk 17")
cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n"))
pred = data.frame(`Class` = valid$Class, `Pred` = y_valid_pred)
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_valid)
tempwyniki[1, "valid_Accuracy"] = cm_test$overall[1]
tempwyniki[1, "valid_Sensitivity"] = cm_test$byClass[1]
tempwyniki[1, "valid_Specificity"] = cm_test$byClass[2]
tempwyniki[1, "valid_PPV"] = cm_test$byClass[3]
tempwyniki[1, "valid_NPV"] = cm_test$byClass[4]
tempwyniki[1, "valid_F1"] = cm_test$byClass[7]
saveRDS(cm_valid, paste0(temp_dir,"/models/keras",model_id,"/cm_valid.RDS"))
#message("Checkpoint passed: chunk 18")
if(add_features_to_predictions) {
mix = rbind(train,test,valid)
mixx = rbind(x_train_scale, x_test_scale, x_valid_scale)
y_mixx_pred <- predict(object = dnn_class_model, x = mixx)
mix$Podzial = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid)))
mix$Pred = y_mixx_pred[,2]
mix$PredClass = ifelse(mix$Pred >= wybrany_cutoff, "Cancer", "Control")
fwrite(mix, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) } else {
mix = rbind(train,test,valid)
mixx = rbind(x_train_scale, x_test_scale, x_valid_scale)
y_mixx_pred <- predict(object = dnn_class_model, x = mixx)
mix2 = data.frame(
`Podzial` = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))),
`Pred` = y_mixx_pred[,2],
`PredClass` = ifelse(y_mixx_pred[,2] >= wybrany_cutoff, "Cancer", "Control")
)
fwrite(mix2, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz"))
}
fwrite(cbind(hyperparameters[i,], tempwyniki), paste0(temp_dir,"/models/keras",model_id,"/wyniki.csv"))
#message("Checkpoint passed: chunk 19")
wagi = get_weights(dnn_class_model)
saveRDS(wagi, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.RDS"))
save_model_weights_hdf5(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.hdf5"))
saveRDS(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel.RDS"))
#message("Checkpoint passed: chunk 20")
# czy jest sens zapisywac?
#sink()
#sink(type="message")
message(paste0("\n ",model_id, ": ", tempwyniki[1, "training_Accuracy"], " / ", tempwyniki[1, "test_Accuracy"], " ==> ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc))
cat(paste0("\n ",model_id, ": ", tempwyniki[1, "training_Accuracy"], " / ", tempwyniki[1, "test_Accuracy"], " ==> ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc))
if(tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc) {
# zapisywanie modelu do waciwego katalogu
if (save_all_vars) { save(list = ls(all=TRUE), file = paste0(temp_dir,"/models/keras",model_id,"/all.Rdata.gz"), compress = "gzip", compression_level = 9) }
if(!dir.exists(paste0("models/",codename,"/"))) { dir.create(paste0("models/",codename,"/")) }
if(dir.exists("/OmicSelector")) {
system(paste0("/bin/bash -c 'screen -dmS save_network zip -9 -r ", paste0("models/",codename,"/",codename, "_", model_id,".zip") ," ", paste0(temp_dir,"models/keras",model_id), "/'"))
} else {
zip(paste0("models/",codename,"/",codename, "_", model_id,".zip"),list.files(paste0(temp_dir,"models/keras",model_id), full.names = T, recursive = T, include.dirs = T)) }
# file.copy(list.files(paste0(temp_dir,"/models/keras",model_id), pattern = "_wyniki.csv$", full.names = T, recursive = T, include.dirs = T),paste0("temp/",codename,"_",model_id,"_deeplearningresults.csv"))
if (!dir.exists(paste0("temp/",codename,"/"))) { dir.create(paste0("temp/",codename,"/")) }
file.copy(list.files(paste0(temp_dir,"/models/keras",model_id), pattern = "_wyniki.csv$", full.names = T, recursive = T, include.dirs = T),paste0("temp/",codename,"/",model_id,"_deeplearningresults.csv"))
#message("Checkpoint passed: chunk 21")
#dev.off()
}
} else {
x_train <- train %>%
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>%
as.matrix()
y_train <- train %>%
dplyr::select("Class") %>%
as.matrix()
y_train[,1] = ifelse(y_train[,1] == "Cancer",1,0)
#message("Checkpoint passed: chunk 22")
x_test <- test %>%
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>%
as.matrix()
y_test <- test %>%
dplyr::select("Class") %>%
as.matrix()
y_test[,1] = ifelse(y_test[,1] == "Cancer",1,0)
x_valid <- valid %>%
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>%
as.matrix()
y_valid <- valid %>%
dplyr::select("Class") %>%
as.matrix()
y_valid[,1] = ifelse(y_valid[,1] == "Cancer",1,0)
#message("Checkpoint passed: chunk 23")
if(hyperparameters[i, 17] == T) {
x_train_scale = x_train %>% scale()
col_mean_train <- attr(x_train_scale, "scaled:center")
col_sd_train <- attr(x_train_scale, "scaled:scale")
x_test_scale <- x_test %>%
scale(center = col_mean_train,
scale = col_sd_train)
x_valid_scale <- x_valid %>%
scale(center = col_mean_train,
scale = col_sd_train)
}
else {
x_train_scale = x_train
x_test_scale <- x_test
x_valid_scale <- x_valid
}
#message("Checkpoint passed: chunk 24")
dnn_class_model <- OmicSelector_keras_create_model(i, hyperparameters = hyperparameters, how_many_features = ncol(x_train_scale))
message("Starting training...")
history <- fit(dnn_class_model, x = x_train_scale,
y = to_categorical(y_train),
epochs = keras_epoch,
validation_data = list(x_test_scale, to_categorical(y_test)),
callbacks = list(
cp_callback,
#callback_reduce_lr_on_plateau(monitor = "val_loss", factor = 0.1),
#callback_model_checkpoint(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")),
early_stop),
verbose = 0,
view_metrics = FALSE,
batch_size = keras_batch_size, shuffle = T, class_weight = l_weights)
print(history)
#message("Checkpoint passed: chunk 25")
message(history)
#plot(history, col="black")
saveRDS(history, paste0(temp_dir,"/models/keras",model_id,"/history.RDS"))
#message("Checkpoint passed: chunk 26")
#fwrite(history_df, paste0(temp_dir,"/models/keras",model_id,"/history_df.csv.gz"))
# pdf(paste0(temp_dir,"/models/keras",model_id,"/plots.pdf"))
compare_cx <- data.frame(
train_loss = history$metrics$loss,
test_loss = history$metrics$val_loss
) %>%
tibble::rownames_to_column() %>%
mutate(rowname = as.integer(rowname)) %>%
tidyr::gather(key = "type", value = "value", -rowname)
plot1 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) +
geom_line() +
xlab("epoch") +
ylab("loss") +
theme_bw()
#message("Checkpoint passed: chunk 27")
compare_cx <- data.frame(
train_accuracy = history$metrics$accuracy,
test_accuracy = history$metrics$val_accuracy
) %>%
tibble::rownames_to_column() %>%
mutate(rowname = as.integer(rowname)) %>%
tidyr::gather(key = "type", value = "value", -rowname)
plot2 = ggplot(compare_cx, aes(x = rowname, y = value, color = type)) +
geom_line() +
xlab("epoch") +
ylab("loss") +
theme_bw()
#message("Checkpoint passed: chunk 28")
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training.png"), grid.arrange(plot1, plot2, nrow =2, top = "Training of final neural network"))
# ewaluacja
dnn_class_model = load_model_hdf5(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5"))
y_train_pred <- predict(object = dnn_class_model, x = x_train_scale)
y_test_pred <- predict(object = dnn_class_model, x = x_test_scale)
y_valid_pred <- predict(object = dnn_class_model, x = x_valid_scale)
#message("Checkpoint passed: chunk 29")
# wybranie odciecia
pred = data.frame(`Class` = train$Class, `Pred` = y_train_pred)
library(cutpointr)
cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden)
print(summary(cutoff))
ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/cutoff.png"), plot(cutoff))
wybrany_cutoff = cutoff$optimal_cutpoint
#wyniki[i, "training_AUC"] = cutoff$AUC
tempwyniki[1, "training_AUC"] = cutoff$AUC
#wyniki[i, "cutoff"] = wybrany_cutoff
tempwyniki[1, "cutoff"] = wybrany_cutoff
#message("Checkpoint passed: chunk 30")
cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n"))
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_train)
#message("Checkpoint passed: chunk 31")
t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred)
tempwyniki[1, "training_AUC2"] = t1_roc$auc
tempwyniki[1, "training_AUC_lower95CI"] = as.character(ci(t1_roc))[1]
tempwyniki[1, "training_AUC_upper95CI"] = as.character(ci(t1_roc))[3]
saveRDS(t1_roc, paste0(temp_dir,"/models/keras",model_id,"/training_ROC.RDS"))
#ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_ROC.png"), grid.arrange(plot(t1_roc), nrow =1, top = "Training ROC curve"))
#message("Checkpoint passed: chunk 32")
tempwyniki[1, "training_Accuracy"] = cm_train$overall[1]
tempwyniki[1, "training_Sensitivity"] = cm_train$byClass[1]
tempwyniki[1, "training_Specificity"] = cm_train$byClass[2]
tempwyniki[1, "training_PPV"] = cm_train$byClass[3]
tempwyniki[1, "training_NPV"] = cm_train$byClass[4]
tempwyniki[1, "training_F1"] = cm_train$byClass[7]
saveRDS(cm_train, paste0(temp_dir,"/models/keras",model_id,"/cm_train.RDS"))
#message("Checkpoint passed: chunk 33")
cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n"))
pred = data.frame(`Class` = test$Class, `Pred` = y_test_pred)
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_test)
tempwyniki[1, "test_Accuracy"] = cm_test$overall[1]
tempwyniki[1, "test_Sensitivity"] = cm_test$byClass[1]
tempwyniki[1, "test_Specificity"] = cm_test$byClass[2]
tempwyniki[1, "test_PPV"] = cm_test$byClass[3]
tempwyniki[1, "test_NPV"] = cm_test$byClass[4]
tempwyniki[1, "test_F1"] = cm_test$byClass[7]
saveRDS(cm_test, paste0(temp_dir,"/models/keras",model_id,"/cm_test.RDS"))
#message("Checkpoint passed: chunk 34")
cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n"))
pred = data.frame(`Class` = valid$Class, `Pred` = y_valid_pred)
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_valid)
tempwyniki[1, "valid_Accuracy"] = cm_valid$overall[1]
tempwyniki[1, "valid_Sensitivity"] = cm_valid$byClass[1]
tempwyniki[1, "valid_Specificity"] = cm_valid$byClass[2]
tempwyniki[1, "valid_PPV"] = cm_valid$byClass[3]
tempwyniki[1, "valid_NPV"] = cm_valid$byClass[4]
tempwyniki[1, "valid_F1"] = cm_valid$byClass[7]
saveRDS(cm_valid, paste0(temp_dir,"/models/keras",model_id,"/cm_valid.RDS"))
#message("Checkpoint passed: chunk 35")
if(add_features_to_predictions) {
mix = rbind(train,test,valid)
mixx = rbind(x_train_scale, x_test_scale, x_valid_scale)
y_mixx_pred <- predict(object = dnn_class_model, x = mixx)
mix$Podzial = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid)))
mix$Pred = y_mixx_pred[,2]
mix$PredClass = ifelse(mix$Pred >= wybrany_cutoff, "Cancer", "Control")
fwrite(mix, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz")) } else {
mix = rbind(train,test,valid)
mixx = rbind(x_train_scale, x_test_scale, x_valid_scale)
y_mixx_pred <- predict(object = dnn_class_model, x = mixx)
mix2 = data.frame(
`Podzial` = c(rep("Training",nrow(train)),rep("Test",nrow(test)),rep("Validation",nrow(valid))),
`Pred` = y_mixx_pred[,2],
`PredClass` = ifelse(y_mixx_pred[,2] >= wybrany_cutoff, "Cancer", "Control")
)
fwrite(mix2, paste0(temp_dir,"/models/keras",model_id,"/data_predictions.csv.gz"))
}
fwrite(cbind(hyperparameters[i,], tempwyniki), paste0(temp_dir,"/models/keras",model_id,"/wyniki.csv"))
wagi = get_weights(dnn_class_model)
saveRDS(wagi, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.RDS"))
save_model_weights_hdf5(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel_weights.hdf5"))
saveRDS(dnn_class_model, paste0(temp_dir,"/models/keras",model_id,"/finalmodel.RDS"))
#message("Checkpoint passed: chunk 36")
# czy jest sens zapisywac?
cat(paste0("\n ",model_id, ": ", tempwyniki[1, "training_Accuracy"], " / ", tempwyniki[1, "test_Accuracy"], " ==> ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc))
if(tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc) {
# zapisywanie modelu do waciwego katalogu
#message("Checkpoint passed: chunk 37e")
if (save_all_vars) { save(list = ls(all=TRUE), file = paste0(temp_dir,"/models/keras",model_id,"/all.Rdata.gz"), compress = "gzip", compression_level = 9) }
#message("Checkpoint passed: chunk 37d")
if(!dir.exists(paste0("models/",codename,"/"))) { dir.create(paste0("models/",codename,"/")) }
#message("Checkpoint passed: chunk 37c")
if(dir.exists("/OmicSelector")) {
system(paste0("/bin/bash -c 'screen -dmS save_network zip -9 -r ", paste0("models/",codename,"/",codename, "_", model_id,".zip") ," ", paste0(temp_dir,"models/keras",model_id), "/'"))
} else {
zip(paste0("models/",codename,"/",codename, "_", model_id,".zip"),list.files(paste0(temp_dir,"models/keras",model_id), full.names = T, recursive = T, include.dirs = T)) }
#message("Checkpoint passed: chunk 37b")
if (!dir.exists(paste0("temp/",codename,"/"))) { dir.create(paste0("temp/",codename,"/")) }
#message("Checkpoint passed: chunk 37a")
file.copy(list.files(paste0(temp_dir,"/models/keras",model_id), pattern = "_wyniki.csv$", full.names = T, recursive = T, include.dirs = T),paste0("models/",codename,"/",model_id,"_deeplearningresults.csv"))
#message("Checkpoint passed: chunk 37")
} }
sink()
sink(type="message")
message(paste0("OmicSelector: Finished training network id: ",model_id, " : training_acc: ", tempwyniki[1, "training_Accuracy"], ", testing_acc: ", tempwyniki[1, "test_Accuracy"], " ==> worth_saving: ", tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc))
#dev.off()
tempwyniki2 = cbind(hyperparameters[i,],tempwyniki)
tempwyniki2[1,"name"] = paste0(codename,"_", model_id)
tempwyniki2[1,"worth_saving"] = as.character(tempwyniki[1, "training_Accuracy"]>save_threshold_trainacc & tempwyniki[1, "test_Accuracy"]>save_threshold_testacc)
end_time <- Sys.time()
tempwyniki2[1,"training_time"] = as.character(end_time - start_time)
tempwyniki2
}
saveRDS(final, paste0(output_file,".RDS"))
cat("\nAll done!! Ending..\n")
if (file.exists(output_file)) {
tempfi = fread(output_file)
final = rbind(tempfi, final) }
fwrite(final, output_file)
setwd(oldwd)
#options(warn=0)
# sprztanie
if(clean_temp_files) {
keras::k_clear_session()
unlink(paste0(normalizePath(temp_dir), "/", dir(temp_dir)), recursive = TRUE)
}
}
#' OmicSelector_transfer_learning_neural_network()
#'
#' This function allows to perform simple transfer learning of the network created in the process above by freezing the weights on particular layers.
#'
#' @param selected_miRNAs Which features should be selected?
#' @param new_scaling If to generate new scaling for scaled models (it is recommended for recalibration procedures). If set to false you should set old_train_csv_to_restore_scaling - used to recreate initial scaling.
#' @param model_path Point zip file to be used as ininital model for transfer learning.
#' @param save_scaling Whould you like to save scaling parameters for further reference? Default: yes
#' @param freeze_from Which layers to freeze? Set from in https://keras.rstudio.com/reference/freeze_weights.html. If set to 0 or below - the algorithm will omit freezing.
#' @param freeze_to Which layers to freeze? Set to in https://keras.rstudio.com/reference/freeze_weights.html
#' @param train Provide train set.
#' @param test Provide test set.
#' @param valid Provide validation set.
#'
#' @return Results of uncalibrated and recalibrated models.
#'
#' @export
OmicSelector_transfer_learning_neural_network = function(selected_miRNAs = ".", new_scaling = TRUE, model_path = "tcga_models/pancreatic_tcga_165592-1601307872.zip", save_scaling = TRUE,
old_train_csv_to_restore_scaling = "../tcga/mixed_train.csv" # used if save_scaling is F and no scaling properties were saved in the model; this regenerates scaling based on previous training set.
, freeze_from = 1
, freeze_to = 2,
train = fread("circ_data/mixed_train.csv"),
test = fread("circ_data/mixed_test.csv"),
valid = fread("circ_data/mixed_valid.csv")) {
tempwyniki = data.frame()
library(keras)
# library(OmicSelector)
# OmicSelector_load_extension("deeplearning")
library(data.table)
x_train <- train %>%
{ if (selected_miRNAs != ".") { dplyr::select(., selected_miRNAs) } else { dplyr::select(., starts_with("hsa")) } } %>%
as.matrix()
y_train <- train %>%
dplyr::select("Class") %>%
as.matrix()
y_train[,1] = ifelse(y_train[,1] == "Cancer",1,0)
x_test <- test %>%
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>%
as.matrix()
y_test <- test %>%
dplyr::select("Class") %>%
as.matrix()
y_test[,1] = ifelse(y_test[,1] == "Cancer",1,0)
x_valid <- valid %>%
{ if (selected_miRNAs != ".") { dplyr::select(.,selected_miRNAs) } else { dplyr::select(.,starts_with("hsa")) } } %>%
as.matrix()
y_valid <- valid %>%
dplyr::select("Class") %>%
as.matrix()
y_valid[,1] = ifelse(y_valid[,1] == "Cancer",1,0)
#####
# INIT MODEL
model_path_in_zip = dplyr::filter(unzip(model_path, list = T), grepl("finalmodel.hdf5",Name))[1,"Name"]
unzip(model_path, model_path_in_zip, exdir = tempdir())
model_path_unzipped = paste0(tempdir(), "/", model_path_in_zip)
init_model = keras::load_model_hdf5(model_path_unzipped)
init_model
wagi_wyjsciowe = get_weights(init_model)
wyniki_path_in_zip = dplyr::filter(unzip(model_path, list = T), grepl("wyniki.csv",Name))[1,"Name"]
unzip(model_path, wyniki_path_in_zip, exdir = tempdir())
wyniki_path_unzipped = paste0(tempdir(), "/", wyniki_path_in_zip)
pre_conf = data.table::fread(wyniki_path_unzipped)
#
# If scaled
if(as.logical(pre_conf[1,"scaled"])) {
if(new_scaling) {
x_train_scale = x_train %>% scale()
col_mean_train <- attr(x_train_scale, "scaled:center")
col_sd_train <- attr(x_train_scale, "scaled:scale")
saveRDS(col_mean_train, "transfer_col_mean_train.RDS")
saveRDS(col_sd_train, "transfer_col_sd_train.RDS")
x_test_scale <- x_test %>%
scale(center = col_mean_train,
scale = col_sd_train)
x_valid_scale <- x_valid %>%
scale(center = col_mean_train,
scale = col_sd_train)
} else {
tcga_train = fread(old_train_csv_to_restore_scaling)
tcga_train <- tcga_train %>%
{ if (selected_miRNAs != ".") { dplyr::select(., selected_miRNAs) } else { dplyr::select(., starts_with("hsa")) } } %>%
as.matrix() %>% scale()
col_mean_train <- attr(tcga_train, "scaled:center")
col_sd_train <- attr(tcga_train, "scaled:scale")
saveRDS(col_mean_train, "transfer_col_mean_train.RDS")
saveRDS(col_sd_train, "transfer_col_sd_train.RDS")
x_train_scale <- x_train %>%
scale(center = col_mean_train,
scale = col_sd_train)
x_test_scale <- x_test %>%
scale(center = col_mean_train,
scale = col_sd_train)
x_valid_scale <- x_valid %>%
scale(center = col_mean_train,
scale = col_sd_train)
}
} else {
x_train_scale <- x_train
x_test_scale <- x_test
x_valid_scale <- x_valid
}
preds = predict(init_model, x_train_scale)[,2]
library(pROC)
# wybranie odciecia
pred = data.frame(`Class` = as.factor(ifelse(y_train==1,"Cancer","Control")), `Pred` = predict(init_model, x_train_scale))
library(cutpointr)
cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden)
print(summary(cutoff))
ggplot2::ggsave(file = paste0("cutoff.png"), plot(cutoff))
wybrany_cutoff = cutoff$optimal_cutpoint
y_train_pred <- predict(object = init_model, x = x_train_scale)
y_test_pred <- predict(object = init_model, x = x_test_scale)
y_valid_pred <- predict(object = init_model, x = x_valid_scale)
#message("Checkpoint passed: chunk 29")
#wyniki[i, "training_AUC"] = cutoff$AUC
tempwyniki[1, "new_training_AUC"] = cutoff$AUC
#wyniki[i, "cutoff"] = wybrany_cutoff
tempwyniki[1, "new_cutoff"] = wybrany_cutoff
#message("Checkpoint passed: chunk 30")
cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n"))
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_train)
#message("Checkpoint passed: chunk 31")
t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred)
tempwyniki[1, "new_training_AUC2"] = t1_roc$auc
tempwyniki[1, "new_training_AUC_lower95CI"] = as.character(ci(t1_roc))[1]
tempwyniki[1, "new_training_AUC_upper95CI"] = as.character(ci(t1_roc))[3]
saveRDS(t1_roc, paste0("init_training_ROC.RDS"))
#ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_ROC.png"), grid.arrange(plot(t1_roc), nrow =1, top = "Training ROC curve"))
#message("Checkpoint passed: chunk 32")
tempwyniki[1, "new_training_Accuracy"] = cm_train$overall[1]
tempwyniki[1, "new_training_Sensitivity"] = cm_train$byClass[1]
tempwyniki[1, "new_training_Specificity"] = cm_train$byClass[2]
tempwyniki[1, "new_training_PPV"] = cm_train$byClass[3]
tempwyniki[1, "new_training_NPV"] = cm_train$byClass[4]
tempwyniki[1, "new_training_F1"] = cm_train$byClass[7]
saveRDS(cm_train, paste0("init_cm_train.RDS"))
#message("Checkpoint passed: chunk 33")
cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n"))
pred = data.frame(`Class` = as.factor(test$Class), `Pred` = y_test_pred)
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_test)
tempwyniki[1, "new_test_Accuracy"] = cm_test$overall[1]
tempwyniki[1, "new_test_Sensitivity"] = cm_test$byClass[1]
tempwyniki[1, "new_test_Specificity"] = cm_test$byClass[2]
tempwyniki[1, "new_test_PPV"] = cm_test$byClass[3]
tempwyniki[1, "new_test_NPV"] = cm_test$byClass[4]
tempwyniki[1, "new_test_F1"] = cm_test$byClass[7]
saveRDS(cm_test, paste0("init_cm_test.RDS"))
#message("Checkpoint passed: chunk 34")
cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n"))
pred = data.frame(`Class` = as.factor(valid$Class), `Pred` = y_valid_pred)
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_valid)
tempwyniki[1, "new_valid_Accuracy"] = cm_valid$overall[1]
tempwyniki[1, "new_valid_Sensitivity"] = cm_valid$byClass[1]
tempwyniki[1, "new_valid_Specificity"] = cm_valid$byClass[2]
tempwyniki[1, "new_valid_PPV"] = cm_valid$byClass[3]
tempwyniki[1, "new_valid_NPV"] = cm_valid$byClass[4]
tempwyniki[1, "new_valid_F1"] = cm_valid$byClass[7]
saveRDS(cm_valid, paste0("init_cm_valid.RDS"))
#message("Checkpoint passed: chunk 35")
#####
# RECALIBRATION
scenario_i = 2
scenario = "trans1"
trans_model = clone_model(init_model)
early_stop <- callback_early_stopping(monitor = "val_loss", mode="min", patience = 200)
cp_callback <- callback_model_checkpoint(
filepath = paste0(scenario,"_model.hdf5"),
save_best_only = TRUE, period = 10, monitor = "val_loss",
verbose = 0
)
keras_batch_size = 64
# Opcje transferu:
compile(trans_model, loss = 'binary_crossentropy',
metrics = 'accuracy', optimizer = pre_conf$optimizer)
set_weights(trans_model, get_weights(init_model))
if(freeze_from>0) {
freeze_weights(trans_model, from = freeze_from, to = freeze_to)
compile(trans_model, loss = 'binary_crossentropy',
metrics = 'accuracy', optimizer = pre_conf$optimizer) }
# Rekalibracja:
history <- fit(trans_model, x = x_train_scale,
y = to_categorical(y_train),
epochs = 5000,
validation_data = list(x_test_scale, to_categorical(y_test)),
callbacks = list(
cp_callback,
#callback_reduce_lr_on_plateau(monitor = "val_loss", factor = 0.1),
#callback_model_checkpoint(paste0(temp_dir,"/models/keras",model_id,"/finalmodel.hdf5")),
early_stop),
verbose = 0,
view_metrics = FALSE,
batch_size = keras_batch_size, shuffle = T)
history
get_weights(init_model)
get_weights(trans_model)
# Ocena po rekalibracji:
preds = predict(trans_model, x_train_scale)[,2]
library(pROC)
# wybranie odciecia
pred = data.frame(`Class` = as.factor(ifelse(y_train==1,"Cancer","Control")), `Pred` = predict(trans_model, x_train_scale))
library(cutpointr)
cutoff = cutpointr(pred, Pred.2, Class, pos_class = "Cancer", metric = youden)
print(summary(cutoff))
ggplot2::ggsave(file = paste0("trans_cutoff.png"), plot(cutoff))
wybrany_cutoff = cutoff$optimal_cutpoint
y_train_pred <- predict(object = trans_model, x = x_train_scale)
y_test_pred <- predict(object = trans_model, x = x_test_scale)
y_valid_pred <- predict(object = trans_model, x = x_valid_scale)
#message("Checkpoint passed: chunk 29")
#wyniki[i, "training_AUC"] = cutoff$AUC
tempwyniki[scenario_i, "new_training_AUC"] = cutoff$AUC
#wyniki[i, "cutoff"] = wybrany_cutoff
tempwyniki[scenario_i, "new_cutoff"] = wybrany_cutoff
#message("Checkpoint passed: chunk 30")
cat(paste0("\n\n---- TRAINING PERFORMANCE ----\n\n"))
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_train = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_train)
#message("Checkpoint passed: chunk 31")
t1_roc = pROC::roc(Class ~ as.numeric(Pred.2), data=pred)
tempwyniki[scenario_i, "new_training_AUC2"] = t1_roc$auc
tempwyniki[scenario_i, "new_training_AUC_lower95CI"] = as.character(ci(t1_roc))[1]
tempwyniki[scenario_i, "new_training_AUC_upper95CI"] = as.character(ci(t1_roc))[3]
saveRDS(t1_roc, paste0("new_trans_training_ROC.RDS"))
#ggplot2::ggsave(file = paste0(temp_dir,"/models/keras",model_id,"/training_ROC.png"), grid.arrange(plot(t1_roc), nrow =1, top = "Training ROC curve"))
#message("Checkpoint passed: chunk 32")
tempwyniki[scenario_i, "new_training_Accuracy"] = cm_train$overall[1]
tempwyniki[scenario_i, "new_training_Sensitivity"] = cm_train$byClass[1]
tempwyniki[scenario_i, "new_training_Specificity"] = cm_train$byClass[2]
tempwyniki[scenario_i, "new_training_PPV"] = cm_train$byClass[3]
tempwyniki[scenario_i, "new_training_NPV"] = cm_train$byClass[4]
tempwyniki[scenario_i, "new_training_F1"] = cm_train$byClass[7]
saveRDS(cm_train, paste0("trans_cm_train.RDS"))
#message("Checkpoint passed: chunk 33")
cat(paste0("\n\n---- TESTING PERFORMANCE ----\n\n"))
pred = data.frame(`Class` = as.factor(test$Class), `Pred` = y_test_pred)
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_test = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_test)
tempwyniki[scenario_i, "new_test_Accuracy"] = cm_test$overall[1]
tempwyniki[scenario_i, "new_test_Sensitivity"] = cm_test$byClass[1]
tempwyniki[scenario_i, "new_test_Specificity"] = cm_test$byClass[2]
tempwyniki[scenario_i, "new_test_PPV"] = cm_test$byClass[3]
tempwyniki[scenario_i, "new_test_NPV"] = cm_test$byClass[4]
tempwyniki[scenario_i, "new_test_F1"] = cm_test$byClass[7]
saveRDS(cm_test, paste0("trans_cm_test.RDS"))
#message("Checkpoint passed: chunk 34")
cat(paste0("\n\n---- VALIDATION PERFORMANCE ----\n\n"))
pred = data.frame(`Class` = as.factor(valid$Class), `Pred` = y_valid_pred)
pred$PredClass = ifelse(pred$Pred.2 >= wybrany_cutoff, "Cancer", "Control")
pred$PredClass = factor(pred$PredClass, levels = c("Control","Cancer"))
cm_valid = caret::confusionMatrix(pred$PredClass, pred$Class, positive = "Cancer")
print(cm_valid)
tempwyniki[scenario_i, "new_valid_Accuracy"] = cm_valid$overall[1]
tempwyniki[scenario_i, "new_valid_Sensitivity"] = cm_valid$byClass[1]
tempwyniki[scenario_i, "new_valid_Specificity"] = cm_valid$byClass[2]
tempwyniki[scenario_i, "new_valid_PPV"] = cm_valid$byClass[3]
tempwyniki[scenario_i, "new_valid_NPV"] = cm_valid$byClass[4]
tempwyniki[scenario_i, "new_valid_F1"] = cm_valid$byClass[7]
saveRDS(cm_valid, paste0("trans_cm_valid.RDS"))
#message("Checkpoint passed: chunk 35")
resu = cbind(pre_conf, tempwyniki)
resu$type = c("crude","recalibrated")
resu$based_on = basename(model_path)
resu$based_on_path = model_path
new_name = make.names(paste0("transfer-",as.numeric(Sys.time())))
resu$new_model_name = c("",new_name)
fwrite(resu, "resu.csv")
zip(zipfile = paste0(new_name,".zip"), files = c(list.files(".", pattern = ".hdf5"), list.files(".", pattern = ".csv"), list.files(".", pattern = ".RDS"), list.files(".", pattern = ".png")))
unlink(c(list.files(".", pattern = ".hdf5"), list.files(".", pattern = ".csv"), list.files(".", pattern = ".RDS"), list.files(".", pattern = ".png")))
resu
}
#!/bin/bash
until Rscript 02_deeplearning.R; do
echo "Wywalil sie... powtarzam.."
sleep 10
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment