Last active
April 7, 2022 13:25
-
-
Save kstawiski/65bfbee52349c8b9790819c2cbc4ae3a to your computer and use it in GitHub Desktop.
Sample size estmation for circulating miRNA biomarker study based on nonlinear weighted least squares optimization.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This code is a part of paper entitled: "Deep learning-based integration of circulating and cellular miRNAs expression of pancreatic cancer" | |
# Author: Konrad Stawiski, MD, PhD (konrad@konsta.com.pl; https://konsta.com.pl) | |
# Based on https://bmcmedinformdecismak.biomedcentral.com/articles/10.1186/1472-6947-12-8 | |
library(dplyr) | |
setwd("C:/Users/konra/OneDrive/Doktorat/Analiza/Szacowanie") | |
set.seed(1) | |
# Get exemplary file | |
Elias2017 <- data.table::fread("Elias2017.csv") | |
Y = as.factor(Elias2017$Class) # Class = Case or Control | |
X = as.matrix(dplyr::select(Elias2017, starts_with("hsa"))) # select all miRNAs | |
temp = dplyr::select(Elias2017, starts_with("hsa"), Class) | |
temp$Class = as.factor(temp$Class) | |
# Modifications start here | |
library(doParallel) | |
cl <- makePSOCKcluster(parallel::detectCores()-1) | |
registerDoParallel(cl) | |
library(caret) | |
library(nnet) | |
trControl = trainControl(method = "repeatedcv", | |
number = 10, | |
repeats = 5, | |
classProbs = TRUE, | |
summaryFunction = twoClassSummary, | |
search = "random") | |
wyniki = data.frame(`i` = numeric(), `meanAUC` = numeric(), `lower` = numeric(), `upper` = numeric()) | |
for(i in 20:nrow(temp)){ | |
try({ | |
cat(i) | |
tempindex = sample(1:nrow(temp),i) | |
tempmodel = train(Class ~ ., data = temp[tempindex,], method = "nnet", family = "binomial", trControl = trControl, tuneLength=500) | |
m = mean(tempmodel$results$ROC, na.rm = T) | |
t = t.test(tempmodel$results$ROC, na.rm = T) | |
lower = t$conf.int[1] | |
upper = t$conf.int[2] | |
wyniki = rbind(wyniki, c(i,m,lower,upper)) }) | |
} | |
colnames(wyniki) = c("i", "meanROC", "lower", "upper") | |
data.table::fwrite(wyniki,paste0("symulacja",as.numeric(Sys.time()),".csv")) | |
p<- ggplot(wyniki, aes(x=i, y=meanROC)) + | |
geom_line() + | |
geom_point()+ | |
geom_errorbar(aes(ymin=lower, ymax=upper), width=.2, | |
position=position_dodge(0.05)) | |
# Finished line plot | |
p+labs(title="Samples", x="Number of miRNAs", y = "mean AUC ROC (5x 10-fold CV)")+ | |
theme_classic() | |
####################### | |
FitModel<- function(offset, X, Y, W, i, N,startParams) | |
{ | |
#Data considering only points between offset and i | |
x<-X[offset:i]; | |
y<-Y[offset:i]; | |
w<-W[offset:i]; | |
gradientF<-deriv3(~(1-a)-(b*(x^c)), c("a","b","c"), function(a,b,c,x) NULL); # fitting the model using nls | |
m<-nls(y~gradientF(a,b,c,x), start = startParams, weights=w, | |
control = list(maxiter=1000, warnOnly = TRUE), | |
algorithm = "port", upper = list(a=10, b = 10, c = -0.1), | |
lower = list(a = 0, b = 0, c=-10), data = data.frame(y=y, x=x)) | |
#predict Y for sample sizes not used to fit the curve | |
#if all data was used to fit model, testing data = training data | |
#else, testing data = (total data - training data) | |
if (i==N){ | |
testX<-X[(offset:i)]; | |
testY<-Y[(offset:i)]; | |
testW <- W[offset:i]; | |
} | |
else{ | |
testX<-X[((i+1):N)]; | |
testY<-Y[((i+1):N)]; | |
testW<-W[((i+1):N)]; | |
} | |
#predictions on unseen data | |
#Get remaining X | |
prediction<-predict(m, list(x=testX)); #confidence intervals | |
se.fit <- sqrt(apply(attr(predict(m, list(x=testX)),"gradient"),1, | |
function(x) sum(vcov(m)*outer(x,x)))); | |
prediction.ci <- prediction + outer(se.fit,qnorm(c(.5, .025,.975))); | |
predictY<-prediction.ci[,1]; | |
predictY.lw<-prediction.ci[,2]; | |
predictY.up<-prediction.ci[,3]; | |
#Calculate residuals | |
if (i==N) | |
else | |
res<-rep(0,length(X)) res<-rep(0,length(X-i)) | |
res<-(predictY-testY); | |
#Calculate Root Mean square error | |
rmseValue<-rmse(testY, predictY); #Calculate Absolute error | |
maeValue<-abse(testY, predictY); | |
} | |
#End function |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment