Skip to content

Instantly share code, notes, and snippets.

@Loyale
Last active December 22, 2015 16:18
Show Gist options
  • Save Loyale/6497871 to your computer and use it in GitHub Desktop.
Save Loyale/6497871 to your computer and use it in GitHub Desktop.
GSEA from cummeRbund (work in progress)
library(cummeRbund)
library(limma)
library(GSA)
cuff<-readCufflinks()
Input_df<-data.frame("gene_id"=rownames(dat),"gene_short_name"=rownames(dat))
Input_df<-cbind(Input_df,fpkmMatrix(genes(cuff)))
Input_df$gene_short_name<-toupper(Input_df$gene_short_name)
#Input_df should a data.frame of the form:
#"gene_id" "gene_short_name" cond_1 cond_2 cond_3 ...
gene_set_index <- function(genelist, short_names)
{
which(short_names %in% genelist)
}
ztest<-function(samp,pop){
(mean(samp,na.rm=T)-mean(pop,na.rm=T))/sd(pop,na.rm=T)
}
get_gene_set_p_vals <- function(mon_res_A_df, gs, alternative="mixed")
{
gene_set_indices <- lapply(gs$genesets,
function(x, short_names) { gene_set_index(x, short_names)},
mon_res_A_df$gene_short_name)
pvl <- apply(mon_res_A_df[,-c(1,2)], 2, function(stats) { lapply(gene_set_indices, geneSetTest, stats, alternative=alternative) } )
pvl_mat <- do.call(rbind, lapply(pvl, unlist))
colnames(pvl_mat) <- gs$geneset.names
rownames(pvl_mat) <- colnames(mon_res_A_df)[-c(1,2)]
return(pvl_mat)
}
get_gene_set_ztest <- function(scoring_df, gs){ #Note: scoring df has same structure as mon_res_A_df
gene_set_indices <- lapply(gs$genesets,
function(x, short_names) { gene_set_index(x, short_names)},
scoring_df$gene_short_name)
zscores <- apply(scoring_df[,-c(1,2)],2, function(mat_col){
lapply(gene_set_indices,function(gsi){
ztest(mat_col[gsi],mat_col)
})
})
zscore_mat<-do.call(rbind,lapply(zscores,unlist))
colnames(zscore_mat)<- gs$geneset.names
rownames(zscore_mat) <- colnames(scoring_df[,-c(1,2)])
return(zscore_mat)
}
get_gene_set_q_vals <- function(pvl_mat, method="bonferroni")
{
comp_corrected <- matrix(p.adjust(pvl_mat, method=method), nrow=nrow(pvl_mat), ncol=ncol(pvl_mat))
# gsea_q_corrected <- apply(pvl_mat, 2, p.adjust)
# comp_corrected <- t(apply(gsea_q_corrected, 1, p.adjust))
#
colnames(comp_corrected) <- colnames(pvl_mat)
rownames(comp_corrected) <- rownames(pvl_mat)
return(comp_corrected)
}
reactome_gs <- GSA.read.gmt("c2.cp.reactome.v3.1.symbols.gmt")
biocarta_gs <- GSA.read.gmt("c2.cp.biocarta.v3.1.symbols.gmt")
either_pvl_mat <- get_gene_set_p_vals(Input_df, reactome_gs, alternative="either")
either_pvl_bh <- get_gene_set_q_vals(either_pvl_mat)
biocarta_pvl_mat <- get_gene_set_p_vals(Input_df, biocarta_gs, alternative="either")
biocarta_pvl_bh <- get_gene_set_q_vals(biocarta_pvl_mat)
colMins<-function(x){
apply(x,2,min)
}
rowMins<-function(x){
apply(x,1,min)
}
InputCols<-maPalette(low="white",high="red",k=100)
pdf("components_GSEA_reactome.pdf",width=10,height=20)
heatmap.2(-log10(t(either_pvl_bh[component.order,which(colMins(either_pvl_bh) < 0.001)])), trace="none", margins=c(5,30),col=InputCols,dendrogram="both",lhei = c(0.1,0.90))
dev.off()
pdf("components_GSEA_biocarta.pdf",width=10,height=10)
heatmap.2(-log10(t(biocarta_pvl_bh[component.order,which(colMins(biocarta_pvl_bh) < 0.01)])), trace="none", margins=c(5,30),col=InputCols,dendrogram="both",lhei = c(0.1,0.90))
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment