Create a gist now

Instantly share code, notes, and snippets.

What would you like to do?
Instructions: you would have to download and format the Marks et al. data from
their supplement first. Then, save this file as an .Rnw, Stangle and
source the resulting .R file.
Import data from Marks et al. Cell 2012.
<<>>=
dat <- read.table('./marks2i.serum.example.txt')
# > head(dat)
# GeneSymbol log2Fold pval
# 1 0610007C21Rik -1.0501235 0.02
# 2 0610007L01Rik 0.3659197 0.11
# 3 0610007P08Rik 0.4903256 0.23
# 4 0610007P14Rik -0.2574317 0.20
# 5 0610007P22Rik -0.3934814 0.19
# 6 0610009B22Rik 0.2751580 0.07
all.genes.universe <- as.character(dat$GeneSymbol)
@
Get GO list from bioConductor-curated database:
<<>>=
require(goseq)
# replace line below with the org database for your genome of interest
require(org.Mm.eg.db)
goannotlist <- goseq::getgo( all.genes.universe, 'mm9', 'geneSymbol',
fetch.cats = 'GO:BP' )
@
Turn list into a two column data frame (actually data table):
<<>>=
require(plyr)
require(data.table)
f.golist.to.godf <- function(goannot){
gonames <- unlist(goannot,use.names=FALSE)
genenames <- ldply(goannot, length)
genenames <- rep( genenames[['.id']], genenames[['V1']])
goannotlong <- data.frame( GeneSymbol = genenames, Category = gonames)
return( data.table(goannotlong) )
}
goannotlong <- f.golist.to.godf( goannotlist )
@
Apply criteria used by Marks et al. for differential expression.
<<>>=
genes.high2i <- subset(dat, log2Fold < -1 & pval <0.2,
select = 'GeneSymbol', drop = TRUE)
genes.highserum <- subset(dat, log2Fold > 1 & pval <0.2,
select = 'GeneSymbol', drop = TRUE)
@
<<>>=
go.highserum <- f.makegreedyGOtable( genehits = genes.highserum,
gotable = goannotlong, gosizecutoff = 300, abbrlength = 50)
go.high2i <- f.makegreedyGOtable( genehits = genes.high2i,
gotable = goannotlong, gosizecutoff = 300, abbrlength = 50)
@
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment