Created
April 24, 2012 19:09
-
-
Save anonymous/2482783 to your computer and use it in GitHub Desktop.
R script for galaxy tool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
args <- commandArgs(TRUE) | |
## inputs | |
countsTsv <- args[1] #"countsData.txt" | |
designTsv <- args[2] # "design.txt" | |
organism <- args[3] #"Fly" # one of names(org), below | |
minimumCountsPerMillion <- as.numeric(args[4]) #1L | |
minimumSamplesPerTranscript <- as.numeric(args[5]) #1L | |
dispersion <- args[6] #"Tagwise" # one of Tagwise, Common | |
pdf_outfile <- args[7] | |
csv_outfile <- args[8] | |
## set-up | |
org <- | |
setNames(c("org.Ag.eg.db", "org.At.tair.db", "org.Bt.eg.db", | |
"org.Ce.eg.db", "org.Cf.eg.db", "org.Dm.eg.db", | |
"org.Dr.eg.db", "org.Gg.eg.db", "org.Hs.eg.db", | |
"org.Mm.eg.db", "org.Mmu.eg.db", "org.Pf.plasmo.db", | |
"org.Pt.eg.db", "org.Rn.eg.db", "org.Sc.sgd.db", | |
"org.Sco.eg.db", "org.Ss.eg.db", "org.Xl.eg.db"), | |
c("Anopheles", "Arabidopsis", "Bovine", "Worm", "Canine", | |
"Fly", "Zebrafish", "Chicken", "Human", "Mouse", | |
"Rhesus", "Malaria", "Chimp", "Rat", "Yeast", | |
"Streptomyces coelicolor", "Pig", | |
"Xenopus")) | |
if (!organism %in% names(org)) | |
stop("unknown organism: ", organism) | |
outputs <- new.env(parent=emptyenv()) | |
library(edgeR) | |
library(goseq) | |
require(org[organism], character.only=TRUE) | |
## data iput | |
counts <- tryCatch({ | |
as.matrix(read.delim(countsTsv, row.names=1)) | |
}, error=function(err) { | |
stop("failed to read count data: ", conditionMessage(err)) | |
}) | |
design <- tryCatch({ | |
csv <- read.delim(designTsv, row.names=1) | |
if (!all(c("SampleId", "Group") %in% names(csv))) | |
stop("'SampleId' and 'Group' columns must be present in 'design' csv") | |
csv | |
}, error=function(err) { | |
stop("failed to read experimental design data: ", | |
conditionMessage(err)) | |
}) | |
map <- with(design, setNames(Group, SampleId)) | |
group <- tryCatch({ | |
if (!all(names(map) %in% colnames(counts))) | |
stop("not all 'SampleId' of 'design' in column names of 'counts'") | |
if (!all(colnames(counts) %in% names(map))) | |
stop("not all column names of 'counts' in 'SampleId' of 'design'") | |
if (2L != length(unique(map))) | |
stop("'Group' of 'design' must have exactly two levels") | |
map[colnames(counts)] | |
}, error=function(err) { | |
stop("failed to identify groups: ", conditionMessage(err)) | |
}) | |
## pre-processing | |
library(edgeR) | |
dge1 <- tryCatch({ | |
dge0 <- DGEList(counts, group=group) | |
calcNormFactors(dge0) | |
}, error=function(err) { | |
stop("failed to create 'DGEList' data structure: ", | |
conditionMessage(err)) | |
}) | |
dge <- tryCatch({ | |
m <- sweep(dge1$counts, 2, 1e6 / dge1$samples$lib.size, '*') | |
ridx <- rowSums(m > minimumCountsPerMillion) > minimumSamplesPerTranscript | |
dge1[ridx,] | |
}, error=function(err) { | |
stop("failed to pre-filter transcripts: ", | |
conditionMessage(err)) | |
}) | |
outputs[["RetainedAfterPreprocessing"]] <- table(ridx) | |
outputs[["MDSPlot"]] <- plotMDS.DGEList(dge) | |
## dispersion | |
model <- tryCatch({ | |
with(design, model.matrix( ~ Group)) | |
}, error=function(err) { | |
stop("failed to create model matrix: ", | |
conditionMessage(err)) | |
}) | |
dge <- tryCatch({ | |
switch(dispersion, | |
Tagwise=estimateTagwiseDisp(dge), | |
Common=estimateCommonDisp(dge, model)) | |
}, error=function(err) { | |
stop("failed to calculate dispersions: ", | |
conditionMessage(err)) | |
}) | |
outputs[["DispersionSD"]] <- sqrt(dge$common.dispersion) | |
## fit, likelihood ratio test, top-table | |
tryCatch({ | |
fit <- glmFit(dge, model, dispersion=dge$common.dispersion) | |
lrTest <- glmLRT(dge, fit, coef=2) | |
tt <- topTags(lrTest, Inf) | |
}, error=function(err) { | |
stop("failed to fit model: ", conditionMessage(err)) | |
}) | |
## summary | |
topTable <- tryCatch({ | |
grpMeans <- sapply(levels(group), function(grp, counts, map) { | |
cidx <- names(map)[map %in% grp] | |
x <- counts[,cidx] | |
rowMeans(counts[,cidx]) | |
}, counts, map) | |
colnames(grpMeans) <- sprintf("%sMeanCount", colnames(grpMeans)) | |
ridx <- rownames(tt$table) | |
cbind(tt$table, grpMeans[ridx,]) | |
}, error=function(err) { | |
stop("failed to annotate results: ", conditionMessage(err)) | |
}) | |
outputs[["TopTable"]] <-topTable[order(topTable$FDR),] | |
pdf(pdf_outfile); plot(outputs[["MDSPlot"]]); dev.off() | |
write.csv(outputs$TopTable, file=csv_outfile) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment