Skip to content

Instantly share code, notes, and snippets.

Created April 24, 2012 19:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/2482783 to your computer and use it in GitHub Desktop.
Save anonymous/2482783 to your computer and use it in GitHub Desktop.
R script for galaxy tool
args <- commandArgs(TRUE)
## inputs
countsTsv <- args[1] #"countsData.txt"
designTsv <- args[2] # "design.txt"
organism <- args[3] #"Fly" # one of names(org), below
minimumCountsPerMillion <- as.numeric(args[4]) #1L
minimumSamplesPerTranscript <- as.numeric(args[5]) #1L
dispersion <- args[6] #"Tagwise" # one of Tagwise, Common
pdf_outfile <- args[7]
csv_outfile <- args[8]
## set-up
org <-
setNames(c("org.Ag.eg.db", "org.At.tair.db", "org.Bt.eg.db",
"org.Ce.eg.db", "org.Cf.eg.db", "org.Dm.eg.db",
"org.Dr.eg.db", "org.Gg.eg.db", "org.Hs.eg.db",
"org.Mm.eg.db", "org.Mmu.eg.db", "org.Pf.plasmo.db",
"org.Pt.eg.db", "org.Rn.eg.db", "org.Sc.sgd.db",
"org.Sco.eg.db", "org.Ss.eg.db", "org.Xl.eg.db"),
c("Anopheles", "Arabidopsis", "Bovine", "Worm", "Canine",
"Fly", "Zebrafish", "Chicken", "Human", "Mouse",
"Rhesus", "Malaria", "Chimp", "Rat", "Yeast",
"Streptomyces coelicolor", "Pig",
"Xenopus"))
if (!organism %in% names(org))
stop("unknown organism: ", organism)
outputs <- new.env(parent=emptyenv())
library(edgeR)
library(goseq)
require(org[organism], character.only=TRUE)
## data iput
counts <- tryCatch({
as.matrix(read.delim(countsTsv, row.names=1))
}, error=function(err) {
stop("failed to read count data: ", conditionMessage(err))
})
design <- tryCatch({
csv <- read.delim(designTsv, row.names=1)
if (!all(c("SampleId", "Group") %in% names(csv)))
stop("'SampleId' and 'Group' columns must be present in 'design' csv")
csv
}, error=function(err) {
stop("failed to read experimental design data: ",
conditionMessage(err))
})
map <- with(design, setNames(Group, SampleId))
group <- tryCatch({
if (!all(names(map) %in% colnames(counts)))
stop("not all 'SampleId' of 'design' in column names of 'counts'")
if (!all(colnames(counts) %in% names(map)))
stop("not all column names of 'counts' in 'SampleId' of 'design'")
if (2L != length(unique(map)))
stop("'Group' of 'design' must have exactly two levels")
map[colnames(counts)]
}, error=function(err) {
stop("failed to identify groups: ", conditionMessage(err))
})
## pre-processing
library(edgeR)
dge1 <- tryCatch({
dge0 <- DGEList(counts, group=group)
calcNormFactors(dge0)
}, error=function(err) {
stop("failed to create 'DGEList' data structure: ",
conditionMessage(err))
})
dge <- tryCatch({
m <- sweep(dge1$counts, 2, 1e6 / dge1$samples$lib.size, '*')
ridx <- rowSums(m > minimumCountsPerMillion) > minimumSamplesPerTranscript
dge1[ridx,]
}, error=function(err) {
stop("failed to pre-filter transcripts: ",
conditionMessage(err))
})
outputs[["RetainedAfterPreprocessing"]] <- table(ridx)
outputs[["MDSPlot"]] <- plotMDS.DGEList(dge)
## dispersion
model <- tryCatch({
with(design, model.matrix( ~ Group))
}, error=function(err) {
stop("failed to create model matrix: ",
conditionMessage(err))
})
dge <- tryCatch({
switch(dispersion,
Tagwise=estimateTagwiseDisp(dge),
Common=estimateCommonDisp(dge, model))
}, error=function(err) {
stop("failed to calculate dispersions: ",
conditionMessage(err))
})
outputs[["DispersionSD"]] <- sqrt(dge$common.dispersion)
## fit, likelihood ratio test, top-table
tryCatch({
fit <- glmFit(dge, model, dispersion=dge$common.dispersion)
lrTest <- glmLRT(dge, fit, coef=2)
tt <- topTags(lrTest, Inf)
}, error=function(err) {
stop("failed to fit model: ", conditionMessage(err))
})
## summary
topTable <- tryCatch({
grpMeans <- sapply(levels(group), function(grp, counts, map) {
cidx <- names(map)[map %in% grp]
x <- counts[,cidx]
rowMeans(counts[,cidx])
}, counts, map)
colnames(grpMeans) <- sprintf("%sMeanCount", colnames(grpMeans))
ridx <- rownames(tt$table)
cbind(tt$table, grpMeans[ridx,])
}, error=function(err) {
stop("failed to annotate results: ", conditionMessage(err))
})
outputs[["TopTable"]] <-topTable[order(topTable$FDR),]
pdf(pdf_outfile); plot(outputs[["MDSPlot"]]); dev.off()
write.csv(outputs$TopTable, file=csv_outfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment