Skip to content

Instantly share code, notes, and snippets.

@stephenturner
Created January 17, 2012 18:15
annotatelimma.r
# Install the necessary bioconductor packages. Use the appropriate annotation.db file
# for your organism/platform. See http://www.bioconductor.org/packages/release/data/annotation/
source("http://www.bioconductor.org/biocLite.R")
biocLite("affy")
biocLite("limma")
biocLite("annotate")
biocLite("hugene10sttranscriptcluster.db")
install.packages("R2HTML")
# Load required packages
library(affy)
library(limma)
library(annotate)
library(hugene10sttranscriptcluster.db)
library(R2HTML)
# RMA to convert Affybatch to ExpressionSet using robust multichip average
eset <- rma(myaffybatch)
class(eset)
# Which platform?
eset@annotation
# Which objects are in this annotation package?
ls("package:hugene10sttranscriptcluster.db")
# Get the transcript cluster IDs from the expressionset
ID <- featureNames(eset)
# Look up the Gene Symbol, name, and Ensembl Gene ID for each of those IDs
Symbol <- getSYMBOL(ID, "hugene10sttranscriptcluster.db")
Name <- as.character(lookUp(ID, "hugene10sttranscriptcluster.db", "GENENAME"))
Ensembl <- as.character(lookUp(ID, "hugene10sttranscriptcluster.db", "ENSEMBL"))
# Note: looking up Ensembl gene IDs by Affy probe IDs can sometimes result in multiple
# gene IDs being returned. This is why the default output from lookUp is a list.
# I'm coercing this list into a character vector so it's the same size as my list of IDs,
# But this is discarding information where one probe ID maps to multiple gene IDs.
# If you're aware of a better solution, please leave it in the comments! Thanks.
# Wherever you have a missing Ensembl ID, make hyperlink missing also.
# If you have an Ensembl ID, create a hyperlink that goes to the
# Ensembl genome browser. Change the URL if you're not using human.
Ensembl <- ifelse(Ensembl=="NA", NA,
paste("<a href='http://useast.ensembl.org/Homo_sapiens/Gene/Summary?g=",
Ensembl, "'>", Ensembl, "</a>", sep=""))
# Make a temporary data frame with all those identifiers
tmp <- data.frame(ID=ID, Symbol=Symbol, Name=Name, Ensembl=Ensembl, stringsAsFactors=F)
# The stringsAsFactors makes "NA" characters. This fixes that problem.
tmp[tmp=="NA"] <- NA
# set the featureData for your expressionset using the data frame you created above.
fData(eset) <- tmp
# Clean up
rm(ID, Symbol, Name, Ensembl, tmp)
# Fit the model
fit <- lmFit(eset,design)
fit <- eBayes(fit)
tt <- topTable(fit, coef="tissue")
# Write out an HTML file with clickable links to the Ensembl Genome Browser.
HTML(tt, "out.html", append=F)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment