Created
January 17, 2012 18:15
annotatelimma.r
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install the necessary bioconductor packages. Use the appropriate annotation.db file | |
# for your organism/platform. See http://www.bioconductor.org/packages/release/data/annotation/ | |
source("http://www.bioconductor.org/biocLite.R") | |
biocLite("affy") | |
biocLite("limma") | |
biocLite("annotate") | |
biocLite("hugene10sttranscriptcluster.db") | |
install.packages("R2HTML") | |
# Load required packages | |
library(affy) | |
library(limma) | |
library(annotate) | |
library(hugene10sttranscriptcluster.db) | |
library(R2HTML) | |
# RMA to convert Affybatch to ExpressionSet using robust multichip average | |
eset <- rma(myaffybatch) | |
class(eset) | |
# Which platform? | |
eset@annotation | |
# Which objects are in this annotation package? | |
ls("package:hugene10sttranscriptcluster.db") | |
# Get the transcript cluster IDs from the expressionset | |
ID <- featureNames(eset) | |
# Look up the Gene Symbol, name, and Ensembl Gene ID for each of those IDs | |
Symbol <- getSYMBOL(ID, "hugene10sttranscriptcluster.db") | |
Name <- as.character(lookUp(ID, "hugene10sttranscriptcluster.db", "GENENAME")) | |
Ensembl <- as.character(lookUp(ID, "hugene10sttranscriptcluster.db", "ENSEMBL")) | |
# Note: looking up Ensembl gene IDs by Affy probe IDs can sometimes result in multiple | |
# gene IDs being returned. This is why the default output from lookUp is a list. | |
# I'm coercing this list into a character vector so it's the same size as my list of IDs, | |
# But this is discarding information where one probe ID maps to multiple gene IDs. | |
# If you're aware of a better solution, please leave it in the comments! Thanks. | |
# Wherever you have a missing Ensembl ID, make hyperlink missing also. | |
# If you have an Ensembl ID, create a hyperlink that goes to the | |
# Ensembl genome browser. Change the URL if you're not using human. | |
Ensembl <- ifelse(Ensembl=="NA", NA, | |
paste("<a href='http://useast.ensembl.org/Homo_sapiens/Gene/Summary?g=", | |
Ensembl, "'>", Ensembl, "</a>", sep="")) | |
# Make a temporary data frame with all those identifiers | |
tmp <- data.frame(ID=ID, Symbol=Symbol, Name=Name, Ensembl=Ensembl, stringsAsFactors=F) | |
# The stringsAsFactors makes "NA" characters. This fixes that problem. | |
tmp[tmp=="NA"] <- NA | |
# set the featureData for your expressionset using the data frame you created above. | |
fData(eset) <- tmp | |
# Clean up | |
rm(ID, Symbol, Name, Ensembl, tmp) | |
# Fit the model | |
fit <- lmFit(eset,design) | |
fit <- eBayes(fit) | |
tt <- topTable(fit, coef="tissue") | |
# Write out an HTML file with clickable links to the Ensembl Genome Browser. | |
HTML(tt, "out.html", append=F) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment