Skip to content

Instantly share code, notes, and snippets.

@ipurusho
Last active September 22, 2017 10:22
Show Gist options
  • Save ipurusho/53eab4d68d9744a3fbdc to your computer and use it in GitHub Desktop.
Save ipurusho/53eab4d68d9744a3fbdc to your computer and use it in GitHub Desktop.
Impute missing values and run t test for proteomic data
analyze.proteomic.data<-function(intensities,meta_data,condA,condB){
#parse conditions to prepare for t test with replicates
colnames(intensities)<-meta_data
condA.regex<-paste("^",condA,"$",sep="")
condB.regex<-paste("^",condB,"$",sep="")
condA.indices<-grep(condA.regex,colnames(intensities))
condB.indices<-grep(condB.regex,colnames(intensities))
#meta_data<-as.factor(meta_data)
#create a matrix of binary values to represent present and missing values
binary_mat<-as.data.frame(ifelse(intensities!="NaN", 1, 0))
#split the data set such that >half missing values will be imputed by min value technique
#and less than half imputed by k-nearest neighbor
knn.impute.bin<-filtered[rowSums(filtered)>ncol(intensities)/2,]
min.impute.bin<-filtered[rowSums(filtered)<ncol(intensities)/2,]
#use R impute package to impute by knn using default parameters
library(impute)
knn.impute<-intensities[rownames(knn.impute.bin),]
imputation.by.knn<-impute.knn(as.matrix(knn.impute))$data
knn.t.test<-apply(imputation.by.knn, 1, function(data) {t.test(x = data[condA.indices], y = data[condB.indices])$p.value})
knn.results<-as.matrix(knn.t.test)
#get minimum value of data set
min.impute.min.val<-min(intensities,na.rm=TRUE)
#since we are using rnorm to replace missing values, t-test should be performed over
#user-defined number of iterations to get an approximate p-value.
#reset seed overwritten by knn
as.numeric(Sys.time())-> t; set.seed((t - floor(t)) * 1e8 -> seed); print(seed)
#randomize and test over iterations
all.p.vals<-list()
for(i in 1:10){
min.impute<-intensities[rownames(min.impute.bin),]
min.impute[min.impute == "NaN"] <- rnorm(min.impute[min.impute == "NaN"], m= min.impute.min.val,sd=sqrt(min.impute.min.val))
min.t.test<-apply(min.impute, 1, function(data) {t.test(x = data[condA.indices], y = data[condB.indices])$p.value})
all.p.vals[[i]]<-min.t.test
}
p.val.mat<-data.frame(matrix(unlist(all.p.vals), nrow=nrow(min.impute), byrow=T))
min.results<-as.matrix(rowMeans(p.val.mat))
rownames(min.results)<-rownames(min.impute)
pvals<-rbind(knn.results,min.results)
return(pvals)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment