Skip to content

Instantly share code, notes, and snippets.

@ipurusho
Last active September 28, 2016 15:09
Show Gist options
  • Save ipurusho/64d6dbf1a32aa7c83f665785155e494b to your computer and use it in GitHub Desktop.
Save ipurusho/64d6dbf1a32aa7c83f665785155e494b to your computer and use it in GitHub Desktop.
Functions to help determine the contribution of covariates, weighted by the percent contribution of principal components
top.var<-function (x, ntop = 500)
{
require("genefilter")
rv = rowVars(x)
select = order(rv, decreasing = TRUE)[seq_len(ntop)]
topvar = x[select, ]
return(topvar)
}
get.pc <-function(topvar){
scaled<-t(scale(t(topvar),scale=F))
PCs<- prcomp(scaled)
PCvals <- PCs$rotation
return(PCvals)
}
get.pc.percents <-function(topvar){
scaled<-t(scale(t(topvar),scale=F))
PCs<- prcomp(scaled)
pc.percents <- signif(summary(PCs)$importance[2,])
return(pc.percents)
}
evaluate.covariates<-function(x,pc.percents,continuous,categorical){
covariate.contribution<-function(x,continuous,categorical){
#asinh transform continuous covariates
asinh.continuous <- lapply(continuous,asinh)
asinh.continuous <- as.data.frame(do.call(cbind,asinh.continuous))
#discretize cateogorical covariates to perform lm
disc.categorical <- lapply(lapply(categorical,as.numeric),function(x){x-1})
disc.categorical <- do.call(cbind,disc.categorical)
# #combine x,continuous, categorical as data frome
#for lmFit usage
lm.data <- cbind(x,asinh.continuous,disc.categorical)
cov.names <- c(colnames(continuous),colnames(categorical))
#loop through dimensions and perform lm
#on lm.data object
r.squared.values <- list()
for(i in 1:ncol(x)){
r.squared.values[[i]] <- unlist(lapply(lm.data[,cov.names],function(covariate) summary(lm(lm.data[,i] ~ covariate))$r.squared))
}
r.squared.values <- do.call(cbind,r.squared.values)
colnames(r.squared.values)<-colnames(x)
r.squared.values
}
r.squared.values <- covariate.contribution(x,continuous,categorical)
contribution<-function(x,r.squared.values,pc.percents){
require(plyr)
require(reshape2)
weighted.contribution<-sweep(r.squared.values, MARGIN=2, STATS=pc.percents, FUN='*')
weighted.sum<-rowSums(weighted.contribution)
colnames(r.squared.values)<-colnames(x)
rownames(r.squared.values)<-paste(rownames(r.squared.values),round(weighted.sum,2),sep = " : ")
heatmap.data<-melt(r.squared.values)
colnames(heatmap.data)<-c("Covariate","dim","R2")
heatmap.data
}
return(contribution(x,r.squared.values,pc.percents))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment