Created
February 11, 2014 09:46
-
-
Save saketkc/8931951 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(limma) | |
library(statmod) | |
library(scatterplot3d) | |
library(made4) | |
library(arrayQualityMetrics) | |
library(Biobase) | |
## We want to read ony Read channels, the way to do it is to pass custom | |
## names to read.maimages() function specifying the green.only=TRUE option | |
## with source="genepix.custom" and G,Gb=Cy5,Cy5b respectively. | |
## Cy5 => RED Channel Forground | |
## Cy5b => RED Channel Background | |
Cy5 <- "F635 Median" | |
Cy5b <- "B635 Mean" | |
## Read annotated targets file. File format: | |
## Filename Condition (two tab separated colmuns, first column specifies the exact name of the file | |
## and the second column "Condition" is used to specify the "class" that file belongs to , say 'Control', | |
## 'Grade2', 'Grade3' etc. ) | |
targets <- readTargets("Grade2_targets.csv") | |
## Create a array of Conditions to be used for design matrix. For what is a design matrix | |
## please refer to my PDF notes | |
f <- factor(targets$Condition, levels = unique(targets$Condition)) | |
design <- model.matrix(~0 + f) | |
colnames(design) <- levels(f) | |
## We want to study the contrast between Grade2VSControl | |
## Refer page 47 section 8.5 of limma-user's guide | |
## and the fold changes are now with respect to the Control, Grade2-Control translates into | |
## log2(G2)-log2(CO) where G2=Grade2 intensity, CO=Control intensity | |
cont.matrix <- makeContrasts(Grade2vsControl=Grade2-Control, levels=design) | |
## Read images treating G,Gb as Cy5,Cy5b as explained in the beginning | |
RG <- read.maimages( targets$FileName, | |
source="genepix.custom", | |
green.only=TRUE, | |
columns=list(G=Cy5,Gb=Cy5b) | |
) | |
#rownames(RG) <- RG$genes$ID | |
types <- data.frame(SpotType=c("Gene","Positive","Negative"), | |
ID=c("*","*","*"), | |
Name=c("*", "H2A|H2B|H3|H4|GST10n|GST50n|GST100n|GST500n", | |
"BSA|Hela cell lysate|p300-BHC"), | |
col=c("blue", "red", "black")) | |
status <- controlStatus(types, RG$genes) | |
E <- RG$E | |
A <- matrix(rowMeans(E),nrow(E),ncol(E)) | |
MA <- new("MAList",list(M=E,A=A)) | |
fn <- paste("MA_","raw",sep="_") | |
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix=fn) | |
## Order by gene ID | |
#RG <- RG[order(rownames(RG)), ] | |
## Control Spots | |
controls <- grep("CONTROL", RG$genes[,"ID"]) | |
## Regular spots | |
regulars <- grep("empty|Empty|CONTROL", invert=TRUE, RG$genes[,"ID"]) | |
## Negative Controls | |
controls.negative <- grep("BSA|Hela cell lysate|p300-BHC", RG$genes[,"Name"]) | |
## Positive Controls | |
controls.positive <- grep("H2A|H2B|H3|H4|GST10n|GST50n|GST100n|GST500n", RG$genes[,"Name"]) | |
## Status of all genes set to regular | |
status <- rep("regular", nrow(RG)) | |
## Convert status of negative controls to negative | |
status[controls.negative] <- "negctrl" | |
## Background correct using 'nec' | |
RG.bc.nec <- nec(RG, status=status, negctrl="negctrl", regular="regular", robust=TRUE, offset=15) | |
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.nec$E)) | |
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.nec", do.logtransform=TRUE,force=TRUE) | |
E <- RG.bc.nec$E | |
A <- matrix(rowMeans(E),nrow(E),ncol(E)) | |
MA <- new("MAList",list(M=E,A=A)) | |
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.nec") | |
write.csv(RG.bc.nec, "Results_new/RG.bc.nec.csv") | |
## Background correction using 'backgroundcorrect' and normexp | |
RG.bc.normexp <- backgroundCorrect(RG, method="normexp", offset=15) | |
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.normexp$E)) | |
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.normexp", do.logtransform=TRUE,force=TRUE) | |
E <- RG.bc.normexp$E | |
A <- matrix(rowMeans(E),nrow(E),ncol(E)) | |
MA <- new("MAList",list(M=E,A=A)) | |
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.normexp") | |
write.csv(RG.bc.normexp,"Results_new/RG.bc.normexp.csv") | |
## Background correct using 'backgroundcorrect' and subtract | |
RG.bc.subtract <- backgroundCorrect(RG, method="subtract", offset=15) | |
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.subtract$E)) | |
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.subtract", do.logtransform=TRUE,force=TRUE) | |
E <- RG.bc.subtract$E | |
A <- matrix(rowMeans(E),nrow(E),ncol(E)) | |
MA <- new("MAList",list(M=E,A=A)) | |
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.subtract") | |
write.csv(RG.bc.subtract, "Results_new/RG.bc.subtract.csv") | |
####### Normalisation ############## | |
## Normalizebetweenarrays | |
weights <- rep(.001,nrow(RG.bc.subtract)) | |
weights[controls.negative] <- 100 | |
weights[controls.positive] <- 0 | |
RG.bc.subtract.nba <- normalizeBetweenArrays(RG.bc.subtract, method="cyclicloess", weights=weights) | |
write.csv(RG.bc.subtract.nba, "Results_new/RG.bc.subtract.nba.csv") | |
RG.bc.normexp.nba <- normalizeBetweenArrays(RG.bc.normexp, method="cyclicloess", weights=weights) | |
write.csv(RG.bc.normexp.nba, "Results_new/RG.bc.normexp.nba.csv") | |
RG.bc.nec.nba <- normalizeBetweenArrays(RG.bc.nec, method="cyclicloess", weights=weights) | |
write.csv(RG.bc.nec.nba, "Results_new/RG.bc.nec.nba.csv") | |
RG.bc.subtract.nba.final <- RG.bc.subtract.nba[regulars, ] | |
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.subtract.nba.final$E)) | |
#arrayQualityMetrics(eset, outdir="Results_new/RG.bc.subtract.nba.final", do.logtransform=TRUE,force=TRUE) | |
#E <- RG.bc.subtract.nba.final$E | |
#A <- matrix(rowMeans(E),nrow(E),ncol(E)) | |
#MA <- new("MAList",list(M=E,A=A)) | |
#plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.subtract") | |
#fn <- paste("Results_new/boxplot","RG.bc.subtract.nba.final",sep="_") | |
#pdf(file=paste(fn,".pdf",sep=""),width=14,height=10, | |
#onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
#boxplot(log2(RG.bc.subtract.nba.final$E)) | |
#dev.off() | |
RG.bc.normexp.nba.final <- RG.bc.normexp.nba[regulars, ] | |
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.normexp.nba.final$E)) | |
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.normexp.nba.final", do.logtransform=TRUE,force=TRUE) | |
E <- RG.bc.normexp.nba.final$E | |
A <- matrix(rowMeans(E),nrow(E),ncol(E)) | |
MA <- new("MAList",list(M=E,A=A)) | |
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.normexp.nba.final") | |
fn <- paste("Results_new/boxplot","RG.bc.subtract.nba.final",sep="_") | |
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10, | |
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
boxplot(log2(RG.bc.subtract.nba.final$E), las=2) | |
dev.off() | |
RG.bc.nec.nba.final <- RG.bc.nec.nba[regulars, ] | |
eset <- ExpressionSet(assayData = assayDataNew(exprs = RG.bc.nec.nba.final$E)) | |
arrayQualityMetrics(eset, outdir="Results_new/RG.bc.nec.nba.final", do.logtransform=TRUE,force=TRUE) | |
E <- RG.bc.nec.nba.final$E | |
A <- matrix(rowMeans(E),nrow(E),ncol(E)) | |
MA <- new("MAList",list(M=E,A=A)) | |
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix="MA_RG.bc.nec.nba.final") | |
fn <- paste("Results_new/boxplot","RG.bc.nec.nba.final",sep="_") | |
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10, | |
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
boxplot(log2(RG.bc.nec.nba.final$E), las=2) | |
dev.off() | |
rownames(RG.bc.subtract.nba.final) <- RG.bc.subtract.nba.final$genes$ID | |
RG.bc.subtract.nba.final <- RG.bc.subtract.nba.final[order(rownames(RG.bc.subtract.nba.final)), ] | |
rownames(RG.bc.normexp.nba.final) <- RG.bc.normexp.nba.final$genes$ID | |
RG.bc.normexp.nba.final <- RG.bc.normexp.nba.final[order(rownames(RG.bc.normexp.nba.final)), ] | |
rownames(RG.bc.nec.nba.final) <- RG.bc.nec.nba.final$genes$ID | |
RG.bc.nec.nba.final <- RG.bc.nec.nba.final[order(rownames(RG.bc.nec.nba.final)), ] | |
modes <- vector(mode="list", length=3) | |
names(modes) <- c("nec", "subtract", "normexp") | |
modes$nec <- RG.bc.nec.nba.final | |
modes$subtract <- RG.bc.subtract.nba.final | |
modes$normexp <- RG.bc.normexp.nba.final | |
for (i in names(all)) | |
{ | |
RG.final <- modes[[i]] | |
status <- controlStatus(types, RG.final$genes) | |
E <- RG.final$E | |
A <- matrix(rowMeans(E),nrow(E),ncol(E)) | |
MA <- new("MAList",list(M=E,A=A)) | |
fn <- paste("MA",i,sep="_") | |
plotMA3by2(MA,status=status, device="pdf", path="Results_new/", prefix=fn) | |
corfit <- duplicateCorrelation(RG.final, design, ndups=2, spacing=1) | |
fit <- lmFit(RG.final, design, ndups=2, correlation=corfit$consensus) | |
fn <- paste("Results_new/MA",i,sep="_") | |
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
plotMA(fit, status=status) | |
dev.off() | |
fn <- paste("Results_new/boxplot",i,sep="_") | |
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10, | |
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
boxplot(log2(RG.final$E), las=2) | |
dev.off() | |
fit2 <- contrasts.fit(fit, cont.matrix) | |
fit2 <- eBayes(fit2) | |
fn <- paste("Results_new/volcano_plot",i,sep="_") | |
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
volcanoplot(fit2, main=i) | |
dev.off() | |
#cat("\\centerline{\\includegraphics[ext=.pdf,type=pdf,read=*,scale=1,angle=0]{", vp, "}}\n\n", sep="") | |
## Filter top genes using adjusted p-values, | |
## Adjusted using BH aka FDR! | |
tt<-topTable(fit2, adjust = "BH", n = nrow(RG.final)/2) | |
filename <- paste("Results_new/toptable_", i, ".csv", sep="") | |
write.csv(tt, filename) | |
## average over duplicates | |
avg <- avedups(RG.final,ndups=2,spacing=1) | |
filename <- paste("Results_new/average_duplicates_", i, ".csv", sep="") | |
write.csv(avg, filename) | |
fit <- lmFit(avg, design, ndups=1) | |
fit2 <- contrasts.fit(fit, cont.matrix) | |
## Computer moderated t-statisitcs, because standta t-tesets are not so useful here! | |
fit2 <- eBayes(fit2) | |
fn <- paste("Results_new/volcano_plot_avg",i,sep="_") | |
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10,onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
volcanoplot(fit2, main=i) | |
dev.off() | |
fn <- paste("Results_new/MA_fit",i,sep="_") | |
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10, | |
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
plotMA(fit) | |
dev.off() | |
#cat("\\centerline{\\includegraphics[ext=.pdf,type=pdf,read=*,scale=1,angle=0]{", vp, "}}\n\n", sep="") | |
## Filter top genes using adjusted p-values, | |
## Adjusted using BH aka FDR! | |
tt<-topTable(fit2, adjust = "BH", n =nrow(RG.final)/2) | |
filename <- paste("Results_new/toptable_average_duplicates_", i, ".csv", sep="") | |
write.csv(tt, filename) | |
norm.vsn <- normalizeVSN(RG.final$E) | |
fit.vsn <- lmFit(norm.vsn, design) | |
fit2.vsn <- contrasts.fit(fit.vsn, cont.matrix) | |
fit3.vsn <- eBayes(fit2.vsn) | |
volcanoplot(fit3.vsn) | |
status <- controlStatus(types, RG$genes) | |
fn <- paste("Results_new/vsn_MA",i,sep="_") | |
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10, | |
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
plotMA(fit) | |
dev.off() | |
fn <- paste("Results_new/vsn_boxplot",i,sep="_") | |
pdf(file=paste(fn,".pdf",sep=""),width=14,height=10, | |
onefile=TRUE, family='Helvetica', paper='legal', pointsize=16) | |
boxplot(norm.vsn, las=2) | |
dev.off() | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment