Skip to content

Instantly share code, notes, and snippets.

@mikelove
Created March 30, 2016 21:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mikelove/f3dd43f7a02765f40f9b17a5fb5fe8b7 to your computer and use it in GitHub Desktop.
Save mikelove/f3dd43f7a02765f40f9b17a5fb5fe8b7 to your computer and use it in GitHub Desktop.
null comparisons 42 WT
files <- list.files("files","gbgout")
exclude <- paste0(c("WT_rep21_MID62_allLanes_tophat2.0.5.bam",
"WT_rep22_MID67_allLanes_tophat2.0.5.bam",
"WT_rep25_MID61_allLanes_tophat2.0.5.bam",
"WT_rep28_MID64_allLanes_tophat2.0.5.bam",
"WT_rep34_MID76_allLanes_tophat2.0.5.bam",
"WT_rep36_MID63_allLanes_tophat2.0.5.bam"), ".gbgout")
files <- files[!files %in% exclude]
length(files)
library(DESeq2)
st <- data.frame(id=seq_along(files),filename=files,cond=rep("wt",length(files)))
dds <- DESeqDataSetFromHTSeqCount(st, "files", ~1)
dds <- dds[rowSums(counts(dds)) > 0,]
#vsd <- varianceStabilizingTransformation(dds)
#plotPCA(vsd, "cond")
library(parallel)
options(mc.cores=25)
ns <- 3:20
system.time({res <- lapply(ns, function(n) {
cat("\n n =",n,"\n")
mclapply(seq_len(100), function(i) {
cat(i)
idx <- sample(ncol(dds),2*n)
dds.sub <- dds[,idx]
dds.sub <- dds.sub[rowSums(counts(dds.sub)) > 0,]
dds.sub$cond <- factor(rep(c("A","B"),each=n))
design(dds.sub) <- ~cond
dds.sub <- DESeq(dds.sub, quiet=TRUE)
res <- results(dds.sub)
nfp <- sum(res$padj < .05, na.rm=TRUE)
nfp / nrow(dds.sub)
})
})
})
save(res, dds, ns, file="res_grid.rda")
###
library(DESeq2)
load("res_grid.rda")
fpr <- sapply(res, function(x) do.call(c, x))
colnames(fpr) <- ns
colMeans(fpr < 0.05)
meds <- apply(fpr, 2, median)
quant75 <- apply(fpr, 2, quantile, .75)
quant95 <- apply(fpr, 2, quantile, .95)
outliers <- lapply(1:ncol(fpr), function(j) fpr[fpr[,j] > quantile(fpr[,j], .95),j])
plot(ns, meds, type="n", ylim=c(0,1), ylab="FP fraction", xlab="num replicates")
lines(ns, meds, col="black")
lines(ns, quant75, col="blue")
lines(ns, quant95, col="purple")
abline(h=0.05, col="red")
for (i in seq_along(ns)) {
if (length(outliers[[i]]) > 0) {
points(rep(ns[i],length(outliers[[i]])), outliers[[i]], pch=20, cex=.5)
}
}
legend("topright", c("median","75","95"), col=c("black","blue","purple"), lwd=1, cex=1.2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment