stephenturner/annotatelimma.r

## annotatelimma.r
# Install the necessary bioconductor packages. Use the appropriate annotation.db file
# for your organism/platform. See http://www.bioconductor.org/packages/release/data/annotation/
source("http://www.bioconductor.org/biocLite.R")
biocLite("affy")
biocLite("limma")
biocLite("annotate")
biocLite("hugene10sttranscriptcluster.db")
install.packages("R2HTML")

# Load required packages
library(affy)
library(limma)
library(annotate)
library(hugene10sttranscriptcluster.db)
library(R2HTML)

# RMA to convert Affybatch to ExpressionSet using robust multichip average
eset <- rma(myaffybatch)
class(eset)

# Which platform?
eset@annotation

# Which objects are in this annotation package?
ls("package:hugene10sttranscriptcluster.db")

# Get the transcript cluster IDs from the expressionset
ID <- featureNames(eset)

# Look up the Gene Symbol, name, and Ensembl Gene ID for each of those IDs
Symbol <- getSYMBOL(ID, "hugene10sttranscriptcluster.db")
Name <- as.character(lookUp(ID, "hugene10sttranscriptcluster.db", "GENENAME"))
Ensembl <- as.character(lookUp(ID, "hugene10sttranscriptcluster.db", "ENSEMBL"))
# Note: looking up Ensembl gene IDs by Affy probe IDs can sometimes result in multiple
# gene IDs being returned. This is why the default output from lookUp is a list.
# I'm coercing this list into a character vector so it's the same size as my list of IDs,
# But this is discarding information where one probe ID maps to multiple gene IDs.
# If you're aware of a better solution, please leave it in the comments! Thanks.

# Wherever you have a missing Ensembl ID, make hyperlink missing also.
# If you have an Ensembl ID, create a hyperlink that goes to the
# Ensembl genome browser. Change the URL if you're not using human.
Ensembl <- ifelse(Ensembl=="NA", NA,
                  paste("<a href='http://useast.ensembl.org/Homo_sapiens/Gene/Summary?g=",
                        Ensembl, "'>", Ensembl, "</a>", sep=""))

# Make a temporary data frame with all those identifiers
tmp <- data.frame(ID=ID, Symbol=Symbol, Name=Name, Ensembl=Ensembl, stringsAsFactors=F)

# The stringsAsFactors makes "NA" characters. This fixes that problem.
tmp[tmp=="NA"] <- NA

# set the featureData for your expressionset using the data frame you created above.
fData(eset) <- tmp

# Clean up
rm(ID, Symbol, Name, Ensembl, tmp)

# Fit the model
fit <- lmFit(eset,design)
fit <- eBayes(fit)
tt  <- topTable(fit, coef="tissue")

# Write out an HTML file with clickable links to the Ensembl Genome Browser.
HTML(tt, "out.html", append=F)
	# Install the necessary bioconductor packages. Use the appropriate annotation.db file
	# for your organism/platform. See http://www.bioconductor.org/packages/release/data/annotation/
	source("http://www.bioconductor.org/biocLite.R")
	biocLite("affy")
	biocLite("limma")
	biocLite("annotate")
	biocLite("hugene10sttranscriptcluster.db")
	install.packages("R2HTML")

	# Load required packages
	library(affy)
	library(limma)
	library(annotate)
	library(hugene10sttranscriptcluster.db)
	library(R2HTML)

	# RMA to convert Affybatch to ExpressionSet using robust multichip average
	eset <- rma(myaffybatch)
	class(eset)

	# Which platform?
	eset@annotation

	# Which objects are in this annotation package?
	ls("package:hugene10sttranscriptcluster.db")

	# Get the transcript cluster IDs from the expressionset
	ID <- featureNames(eset)

	# Look up the Gene Symbol, name, and Ensembl Gene ID for each of those IDs
	Symbol <- getSYMBOL(ID, "hugene10sttranscriptcluster.db")
	Name <- as.character(lookUp(ID, "hugene10sttranscriptcluster.db", "GENENAME"))
	Ensembl <- as.character(lookUp(ID, "hugene10sttranscriptcluster.db", "ENSEMBL"))
	# Note: looking up Ensembl gene IDs by Affy probe IDs can sometimes result in multiple
	# gene IDs being returned. This is why the default output from lookUp is a list.
	# I'm coercing this list into a character vector so it's the same size as my list of IDs,
	# But this is discarding information where one probe ID maps to multiple gene IDs.
	# If you're aware of a better solution, please leave it in the comments! Thanks.

	# Wherever you have a missing Ensembl ID, make hyperlink missing also.
	# If you have an Ensembl ID, create a hyperlink that goes to the
	# Ensembl genome browser. Change the URL if you're not using human.
	Ensembl <- ifelse(Ensembl=="NA", NA,
	paste("<a href='http://useast.ensembl.org/Homo_sapiens/Gene/Summary?g=",
	Ensembl, "'>", Ensembl, "</a>", sep=""))

	# Make a temporary data frame with all those identifiers
	tmp <- data.frame(ID=ID, Symbol=Symbol, Name=Name, Ensembl=Ensembl, stringsAsFactors=F)

	# The stringsAsFactors makes "NA" characters. This fixes that problem.
	tmp[tmp=="NA"] <- NA

	# set the featureData for your expressionset using the data frame you created above.
	fData(eset) <- tmp

	# Clean up
	rm(ID, Symbol, Name, Ensembl, tmp)

	# Fit the model
	fit <- lmFit(eset,design)
	fit <- eBayes(fit)
	tt <- topTable(fit, coef="tissue")

	# Write out an HTML file with clickable links to the Ensembl Genome Browser.
	HTML(tt, "out.html", append=F)