Skip to content

Instantly share code, notes, and snippets.

@nuada
Last active April 14, 2016 07:07
Show Gist options
  • Save nuada/ef67c17183f650ce9955 to your computer and use it in GitHub Desktop.
Save nuada/ef67c17183f650ce9955 to your computer and use it in GitHub Desktop.
Calculate and plot sequencing coverage per target region (ex. gene).
#!/usr/bin/Rscript
library(foreach)
library(doParallel)
library(iterators)
target_filename <- commandArgs(trailingOnly=T)[1]
doc_filename <- commandArgs(trailingOnly=T)[2]
if (is.na(doc_filename) || !file.exists(doc_filename) || is.na(target_filename) || !file.exists(target_filename)) {
print('Usage: Rscript <script.R> <targets.bed> <doc file>')
quit('no', 1)
}
remove_gaps <- function (intervals, breaks) {
ddply(intervals, breaks, function (df) {
start_gapless <- rep(0, nrow(df))
end_gapless <- df$end-df$start
if (nrow(df) > 1) for (i in 2:nrow(df)) {
start_gapless[i] <- end_gapless[i-1]
end_gapless[i] <- end_gapless[i] + start_gapless[i]
}
transform(df, start_gapless=start_gapless, end_gapless=end_gapless)
})
}
targets <- read.table(target_filename, sep='\t')
names(targets) <- c('chromosome', 'start', 'end', 'gene')
doc <- read.table(doc_filename, header=T, sep='\t')
doc$run <- factor(sapply(doc$sample, function (x) strsplit(as.character(x), '.', fixed=T)[[1]][2]))
cluster <- makeCluster(detectCores(), outfile='')
registerDoParallel(cluster)
invisible(foreach (gene_name = iter(levels(targets$gene))) %dopar% {
library(ggplot2)
library(plyr)
print(paste0('Plotting gene: ', gene_name))
t <- remove_gaps(subset(targets, gene == gene_name), .(chromosome))
t1 <- t[seq(1, nrow(t), 2),]
if (nrow(t) > 1) t2 <- t[seq(2, nrow(t), 2),]
d <- subset(doc, gene == gene_name)
d <- remove_gaps(d, .(sample, chromosome))
d$position <- d$start_gapless+(d$end_gapless-d$start_gapless)/2
if (max(d$depth < 500))
breaks <- seq(0, 500, 30)
else
breaks <- seq(0, 30000, 1000)
p <- ggplot() + geom_rect(data=t1, aes(xmin=start_gapless, xmax=end_gapless, ymin=-Inf, ymax=Inf),
fill='red', linetype=0, alpha=0.1)
if (nrow(t) > 1) p <- p + geom_rect(data=t2, aes(xmin=start_gapless, xmax=end_gapless, ymin=-Inf, ymax=Inf),
fill='red', linetype=0, alpha=0.2)
p <- p + geom_line(data=d, aes(x=position, y=depth, group=sample, color=run))
p <- p + geom_hline(yintercept=30, color='red')
p <- p + scale_y_continuous(breaks=breaks)
p <- p + labs(title=gene_name)
p <- p + guides(col = guide_legend(nrow = 16))
ggsave(plot=p, filename=paste0(doc_filename, '-', gene_name, '.svg'), width=20, height=10, dpi=75)
})
stopCluster(cluster)
#!/bin/bash
[[ "x${*}" == "x" ]] && echo "usage: $0 <intervals> <window> file1 [file...]" && exit 1
cpus=4
intervals=$1
window=$2
shift 2
temp=/tmp/doc-$(date +%F-%H%M%S)
[[ -d ${temp} ]] || mkdir -p ${temp}
bedtools makewindows -b ${intervals} -w ${window} -i src > ${temp}/windows.bed
echo $* | tr ' ' '\n' | xargs -n 1 -P ${cpus} -I '{}' bash -c 'samtools view -b -f 2 {} |
coverageBed -abam - -b ${0}/windows.bed -counts | sort -k 1,1 -k 2,2n |
sed -e "s/^/$(basename -z {} .bam)\t/g" > ${0}/$(basename -z {} .bam).doc' ${temp}
echo 'sample chromosome start end gene depth' | tr ' ' '\t'
cat ${temp}/*.doc
rm -rf ${temp}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment