Skip to content

Instantly share code, notes, and snippets.

@hliang
Last active July 31, 2018 01:26
Show Gist options
  • Save hliang/69a9658f81d1cb07df902c973b1b4986 to your computer and use it in GitHub Desktop.
Save hliang/69a9658f81d1cb07df902c973b1b4986 to your computer and use it in GitHub Desktop.
# Assumes you've already run coverageBed -hist, and grep'd '^all'. E.g. something like:
# find *.bam | parallel 'bedtools -abam {} -b capture.bed -hist | grep ^all > {}.all.txt'
setwd("./qc/")
# Get a list of the bedtools output files you'd like to read in
bedcov_dir = "./bedcov_cds_canol/"
print(files <- list.files(path=bedcov_dir, recursive=TRUE, pattern="samp.*all.txt$", full.names=TRUE))
# This regular expression leaves me with "samp01", "samp02", and "samp03" in the legend.
print(labs <- gsub(".*/(.*)-ready.bam.hist.all.txt", "\\1", files))
# Create lists to hold coverage and cumulative coverage for each alignment,
# and read the data into these lists.
cov <- list()
cov_cumul <- list()
for (i in 1:length(files)) {
cov[[labs[i]]] <- read.table(files[i])
cov_cumul[[labs[i]]] <- 1-cumsum(cov[[i]][,5])
}
# Pick some colors
# Ugly:
# cols <- 1:length(cov)
cols <- rainbow(length(cov))
# Prettier:
# ?colorRampPalette
# display.brewer.all()d
# library(RColorBrewer)
# cols <- brewer.pal(length(cov), "Dark2")
# Save the graph to a file
#png("exome-coverage-plots.png", h=1000, w=1000, pointsize=20)
prefix = gsub(".*bedcov_?([^/]*)/?.*", "\\1", bedcov_dir)
pdfname = paste0("exome-coverage.", prefix, ".pdf")
pdf(pdfname, width=10, height=8)
# Create plot area, but do not plot anything. Add gridlines and axis labels.
plot(cov[[1]][2:401, 2], cov_cumul[[1]][1:400], type='n', xlab="Depth", ylab="Fraction of capture target bases >= depth", ylim=c(0,1.0), main="Target Region Coverage", xlim=c(0, 250))
abline(v = 20, col = "gray60")
abline(v = 50, col = "gray60")
abline(v = 80, col = "gray60")
abline(v = 100, col = "gray60")
abline(h = 0.50, col = "gray60")
abline(h = 0.90, col = "gray60")
axis(1, at=c(20,50,80), labels=c(20,50,80))
axis(2, at=c(0.90), labels=c(0.90))
axis(2, at=c(0.50), labels=c(0.50))
# Actually plot the data for each of the alignments (stored in the lists).
for (i in 1:length(cov)) points(cov[[i]][2:401, 2], cov_cumul[[i]][1:400], type='l', lwd=1, col=cols[i])
orderByDepth = 50
depth_cut = sapply(cov_cumul, function(x) return(x[orderByDepth]))
oo = order(depth_cut, decreasing=TRUE)
# Add a legend using the nice sample labeles rather than the full filenames.
legend("topright", legend=labs[oo], col=cols[oo], lty=1, lwd=1, cex=1, title=paste0("labels ordered by depth", orderByDepth), bg=rgb(1, 1, 1, .9))
abline(v = 50, col = "red", lty=2)
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment