Skip to content

Instantly share code, notes, and snippets.

@MatteoLacki
Created May 5, 2020 11:08
Show Gist options
  • Save MatteoLacki/a51446147901b0b008e493bb90908e0d to your computer and use it in GitHub Desktop.
Save MatteoLacki/a51446147901b0b008e493bb90908e0d to your computer and use it in GitHub Desktop.
# hello Ute, long time no see.
library(LFQBench2)
library(data.table)
library(readxl)
library(stringr)
library(rstatix)
library(readr)
# reading in the report
R = read_wide_report('2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx',
skip=1, sheet="TOP3 quantification")
# header = read_excel("2019-107_115 Renauld remeasure_user designed 20200306-122426_quantification_report_UD_sent.xlsx",
# sheet="TOP3 quantification"
# "L1:", col_names = FALSE)
PD = as.data.table(read_csv("projectDetails.csv"))
colnames(PD) = make.names(colnames(PD))
exp = as.data.table(str_split_fixed(PD$replicate_name, ' ', 2))
colnames(exp) = c('state', 'replicate')
PD = cbind(PD, exp)
rm(exp)
PD = PD[state != 'metastasis']
uteTtests = as.data.table(read_csv("ute_t_tests.csv"))
# subselect columns:
normal_cols = grep("normal ", names(R), value = TRUE)
tumor_cols = grep("tumor ", names(R), value = TRUE)
R_info = R[,description:entry]
all(table(R_info$entry) == 1) # entry uniquely describes each protein and so is a key
I_w = R[, c('entry', normal_cols, tumor_cols), with = FALSE]
I = melt(I_w, id.vars='entry', variable.name ='exp', value.name = 'I')
exp = as.data.table(str_split_fixed(I$exp, ' ', 2))
colnames(exp) = c('state', 'replicate')
I = cbind(I, exp)
rm(exp)
I_small = I[entry %in% c('IGK_HUMAN', 'IGLC2_HUMAN', 'IGL1_HUMAN')]
# redoing Ute's analysis
I_small[, .(uteTtest=t.test(I~state, data=.SD)$p.value), entry]
# this should be done with t.test(I~state+entry, data=I_small), but it does not work.
table(I[, uniqueN(state), entry]$V1)
pvalsUte = sapply(split(I, I$entry), function(x) try(t.test(data=x, I~state$pvalue)))
is.error <- function(x) inherits(x, "try-error")
robustTtest = function(data, formula){
r = try(t.test(formula, data), silent=TRUE)
if(is.error(r)) return(-1)
else return(r$p.value)
}
UteTest = function(data, formula){
r = try(t.test(formula, data), silent=TRUE, na.action)
if(is.error(r)){
X = data[,.(NAcnt = sum(is.na(I))), state]
if(any(X$NAcnt > 30)) return(0) else return(1)
}
else return(r$p.value)
}
res = I[,.(uteTtest=UteTest(.SD, I~state)), entry]
res = uteTtests[res,on='entry']
res[, real_ute_t_test := ifelse(real_ute_t_test==9999,1,real_ute_t_test)]
hist(res$real_ute_t_test - res$uteTtest,
breaks=1000,
main='Excel vs R',
xlab='Excel.pvalue - R.pvalue')
res[, uteTtestFDR := p.adjust(uteTtest, 'fdr')]
hist(res$uteTtestFDR, breaks=100, xlab="Ute's T-test FDR adjusted", main='')
I[, exp:=as.character(exp)]
I = merge(I, PD[,.(replicate_name, Patient.No)], by.x = 'exp', by.y='replicate_name')
bigTest = I[, .(t=UteTest(.SD, I~state)), .(entry, Patient.No)]
bigTest[, tFDR:=p.adjust(t, 'fdr')]
bigTest[,`:=`(pat_t = paste0('t_', Patient.No),
pat_t_FDR=paste0('tFDR_', Patient.No) )]
# hist(bigTest[uteTtest <= 1]$uteTtest, breaks=100)
res = res[merge(dcast(bigTest, entry~pat_t, value.var='t'),
dcast(bigTest, entry~pat_t_FDR, value.var='tFDR'),
by='entry'),
on='entry']
res = R_info[res, on='entry']
write.csv(res, file='results.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment