Skip to content

Instantly share code, notes, and snippets.

@saketkc
Created February 11, 2014 09:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save saketkc/8931951 to your computer and use it in GitHub Desktop.
Save saketkc/8931951 to your computer and use it in GitHub Desktop.
library(limma)
library(statmod)
library(scatterplot3d)
library(made4)
library(arrayQualityMetrics)
library(Biobase)
## We want to read ony Read channels, the way to do it is to pass custom
## names to read.maimages() function specifying the green.only=TRUE option
## with source="genepix.custom" and G,Gb=Cy5,Cy5b respectively.
## Cy5 => RED Channel Forground
## Cy5b => RED Channel Background
Cy5 <- "F635 Median"
Cy5b <- "B635 Mean"
## Read annotated targets file. File format:
## Filename Condition (two tab separated colmuns, first column specifies the exact name of the file
## and the second column "Condition" is used to specify the "class" that file belongs to , say 'Control',
## 'Grade2', 'Grade3' etc. )
targets <- readTargets("Grade2_targets.csv")
## Create a array of Conditions to be used for design matrix. For what is a design matrix
## please refer to my PDF notes
f <- factor(targets$Condition, levels = unique(targets$Condition))
design <- model.matrix(~0 + f)
colnames(design) <- levels(f)
## We want to study the contrast between Grade2VSControl
## Refer page 47 section 8.5 of limma-user's guide
## and the fold changes are now with respect to the Control, Grade2-Control translates into
## log2(G2)-log2(CO) where G2=Grade2 intensity, CO=Control intensity
cont.matrix <- makeContrasts(Grade2vsControl=Grade2-Control, levels=design)
## Read images treating G,Gb as Cy5,Cy5b as explained in the beginning
RG <- read.maimages( targets$FileName,
source="genepix.custom",
green.only=TRUE,
columns=list(G=Cy5,Gb=Cy5b)
)
#rownames(RG) <- RG$genes$ID
types <- data.frame(SpotType=c("Gene","Positive","Negative"),
ID=c("*","*","*"),
Name=c("*", "H2A|H2B|H3|H4|GST10n|GST50n|GST100n|GST500n",
"BSA|Hela cell lysate|p300-BHC"),
col=c("blue", "red", "black"))
status <- controlStatus(types, RG$genes)
E <- RG$E
A <- matrix(rowMeans(E),nrow(E),ncol(E))
MA <- new("MAList",list(M=E,A=A))
fn <- paste("MA_","raw",sep="_")
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix=fn)
## Order by gene ID
#RG <- RG[order(rownames(RG)), ]
## Control Spots
controls <- grep("CONTROL", RG$genes[,"ID"])
## Regular spots
regulars <- grep("empty|Empty|CONTROL", invert=TRUE, RG$genes[,"ID"])
## Negative Controls
controls.negative <- grep("BSA|Hela cell lysate|p300-BHC", RG$genes[,"Name"])
## Positive Controls
controls.positive <- grep("H2A|H2B|H3|H4|GST10n|GST50n|GST100n|GST500n", RG$genes[,"Name"])
## Status of all genes set to regular
status <- rep("regular", nrow(RG))
## Convert status of negative controls to negative
status[controls.negative] <- "negctrl"
## Background correct using 'nec'
RG.bc.nec <- nec(RG, status=status, negctrl="negctrl", regular="regular", robust=TRUE, offset=15)
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.nec$E))
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.nec", do.logtransform=TRUE,force=TRUE)
E <- RG.bc.nec$E
A <- matrix(rowMeans(E),nrow(E),ncol(E))
MA <- new("MAList",list(M=E,A=A))
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.nec")
write.csv(RG.bc.nec, "Results_new/RG.bc.nec.csv")
## Background correction using 'backgroundcorrect' and normexp
RG.bc.normexp <- backgroundCorrect(RG, method="normexp", offset=15)
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.normexp$E))
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.normexp", do.logtransform=TRUE,force=TRUE)
E <- RG.bc.normexp$E
A <- matrix(rowMeans(E),nrow(E),ncol(E))
MA <- new("MAList",list(M=E,A=A))
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.normexp")
write.csv(RG.bc.normexp,"Results_new/RG.bc.normexp.csv")
## Background correct using 'backgroundcorrect' and subtract
RG.bc.subtract <- backgroundCorrect(RG, method="subtract", offset=15)
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.subtract$E))
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.subtract", do.logtransform=TRUE,force=TRUE)
E <- RG.bc.subtract$E
A <- matrix(rowMeans(E),nrow(E),ncol(E))
MA <- new("MAList",list(M=E,A=A))
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.subtract")
write.csv(RG.bc.subtract, "Results_new/RG.bc.subtract.csv")
####### Normalisation ##############
## Normalizebetweenarrays
weights <- rep(.001,nrow(RG.bc.subtract))
weights[controls.negative] <- 100
weights[controls.positive] <- 0
RG.bc.subtract.nba <- normalizeBetweenArrays(RG.bc.subtract, method="cyclicloess", weights=weights)
write.csv(RG.bc.subtract.nba, "Results_new/RG.bc.subtract.nba.csv")
RG.bc.normexp.nba <- normalizeBetweenArrays(RG.bc.normexp, method="cyclicloess", weights=weights)
write.csv(RG.bc.normexp.nba, "Results_new/RG.bc.normexp.nba.csv")
RG.bc.nec.nba <- normalizeBetweenArrays(RG.bc.nec, method="cyclicloess", weights=weights)
write.csv(RG.bc.nec.nba, "Results_new/RG.bc.nec.nba.csv")
RG.bc.subtract.nba.final <- RG.bc.subtract.nba[regulars, ]
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.subtract.nba.final$E))
#arrayQualityMetrics(eset, outdir="Results_new/RG.bc.subtract.nba.final", do.logtransform=TRUE,force=TRUE)
#E <- RG.bc.subtract.nba.final$E
#A <- matrix(rowMeans(E),nrow(E),ncol(E))
#MA <- new("MAList",list(M=E,A=A))
#plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.subtract")
#fn <- paste("Results_new/boxplot","RG.bc.subtract.nba.final",sep="_")
#pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,
#onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
#boxplot(log2(RG.bc.subtract.nba.final$E))
#dev.off()
RG.bc.normexp.nba.final <- RG.bc.normexp.nba[regulars, ]
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.normexp.nba.final$E))
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.normexp.nba.final", do.logtransform=TRUE,force=TRUE)
E <- RG.bc.normexp.nba.final$E
A <- matrix(rowMeans(E),nrow(E),ncol(E))
MA <- new("MAList",list(M=E,A=A))
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.normexp.nba.final")
fn <- paste("Results_new/boxplot","RG.bc.subtract.nba.final",sep="_")
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
boxplot(log2(RG.bc.subtract.nba.final$E), las=2)
dev.off()
RG.bc.nec.nba.final <- RG.bc.nec.nba[regulars, ]
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.nec.nba.final$E))
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.nec.nba.final", do.logtransform=TRUE,force=TRUE)
E <- RG.bc.nec.nba.final$E
A <- matrix(rowMeans(E),nrow(E),ncol(E))
MA <- new("MAList",list(M=E,A=A))
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.nec.nba.final")
fn <- paste("Results_new/boxplot","RG.bc.nec.nba.final",sep="_")
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
boxplot(log2(RG.bc.nec.nba.final$E), las=2)
dev.off()
rownames(RG.bc.subtract.nba.final) <- RG.bc.subtract.nba.final$genes$ID
RG.bc.subtract.nba.final <- RG.bc.subtract.nba.final[order(rownames(RG.bc.subtract.nba.final)), ]
rownames(RG.bc.normexp.nba.final) <- RG.bc.normexp.nba.final$genes$ID
RG.bc.normexp.nba.final <- RG.bc.normexp.nba.final[order(rownames(RG.bc.normexp.nba.final)), ]
rownames(RG.bc.nec.nba.final) <- RG.bc.nec.nba.final$genes$ID
RG.bc.nec.nba.final <- RG.bc.nec.nba.final[order(rownames(RG.bc.nec.nba.final)), ]
modes <- vector(mode="list", length=3)
names(modes) <- c("nec", "subtract", "normexp")
modes$nec <- RG.bc.nec.nba.final
modes$subtract <- RG.bc.subtract.nba.final
modes$normexp <- RG.bc.normexp.nba.final
for (i in names(all))
{
RG.final <- modes[[i]]
status <- controlStatus(types, RG.final$genes)
E <- RG.final$E
A <- matrix(rowMeans(E),nrow(E),ncol(E))
MA <- new("MAList",list(M=E,A=A))
fn <- paste("MA",i,sep="_")
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix=fn)
corfit <- duplicateCorrelation(RG.final, design, ndups=2, spacing=1)
fit <- lmFit(RG.final, design, ndups=2, correlation=corfit$consensus)
fn <- paste("Results_new/MA",i,sep="_")
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
plotMA(fit, status=status)
dev.off()
fn <- paste("Results_new/boxplot",i,sep="_")
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
boxplot(log2(RG.final$E), las=2)
dev.off()
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2)
fn <- paste("Results_new/volcano_plot",i,sep="_")
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
volcanoplot(fit2, main=i)
dev.off()
#cat("\\centerline{\\includegraphics[ext=.pdf,type=pdf,read=*,scale=1,angle=0]{", vp, "}}\n\n", sep="")
## Filter top genes using adjusted p-values,
## Adjusted using BH aka FDR!
tt<-topTable(fit2, adjust = "BH", n = nrow(RG.final)/2)
filename <- paste("Results_new/toptable_", i, ".csv", sep="")
write.csv(tt, filename)
## average over duplicates
avg <- avedups(RG.final,ndups=2,spacing=1)
filename <- paste("Results_new/average_duplicates_", i, ".csv", sep="")
write.csv(avg, filename)
fit <- lmFit(avg, design, ndups=1)
fit2 <- contrasts.fit(fit, cont.matrix)
## Computer moderated t-statisitcs, because standta t-tesets are not so useful here!
fit2 <- eBayes(fit2)
fn <- paste("Results_new/volcano_plot_avg",i,sep="_")
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
volcanoplot(fit2, main=i)
dev.off()
fn <- paste("Results_new/MA_fit",i,sep="_")
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
plotMA(fit)
dev.off()
#cat("\\centerline{\\includegraphics[ext=.pdf,type=pdf,read=*,scale=1,angle=0]{", vp, "}}\n\n", sep="")
## Filter top genes using adjusted p-values,
## Adjusted using BH aka FDR!
tt<-topTable(fit2, adjust = "BH", n =nrow(RG.final)/2)
filename <- paste("Results_new/toptable_average_duplicates_", i, ".csv", sep="")
write.csv(tt, filename)
norm.vsn <- normalizeVSN(RG.final$E)
fit.vsn <- lmFit(norm.vsn, design)
fit2.vsn <- contrasts.fit(fit.vsn, cont.matrix)
fit3.vsn <- eBayes(fit2.vsn)
volcanoplot(fit3.vsn)
status <- controlStatus(types, RG$genes)
fn <- paste("Results_new/vsn_MA",i,sep="_")
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
plotMA(fit)
dev.off()
fn <- paste("Results_new/vsn_boxplot",i,sep="_")
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16)
boxplot(norm.vsn, las=2)
dev.off()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment