Created
June 16, 2011 18:39
-
-
Save arq5x/1029917 to your computer and use it in GitHub Desktop.
Navin Main Processing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################################ | |
# Index the position-sorted BAM files. | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="bam-index" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; samtools index bam/$sample.*.bam" | $QSUB | |
done | |
############################################################ | |
# Collect insert size metrics. | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="picard-isize" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "java -jar /home/arq5x/cphg-home/bin/CollectInsertSizeMetrics.jar INPUT=$TUMHOME/bam/$sample.bam OUTPUT=$TUMHOME/bam/$sample.bam.isize H=$TUMHOME/bam/$sample.bam.hist STOP_AFTER=10000000" | $QSUB | |
done | |
############################################################ | |
# Flagstat. | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="flagstat" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; samtools flagstat bam/$sample.bam > bam/$sample.bam.flagstat" | $QSUB | |
done | |
############################################################ | |
# Create name sorted BAM files. | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="namesort" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; samtools sort -n -m 2000000000 bam/$sample.bam bam/$sample.namesorted" | $QSUB | |
done | |
############################################################ | |
# Count the number of ends for each pair | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="querycount" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=4 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; samtools view bam/$sample.namesorted.bam | cut -f 1 | uniq -c > bam/$sample.namesorted.bam.querycounts" | $QSUB | |
done | |
############################################################ | |
# How many pairs have only one end in the BAM? | |
############################################################ | |
export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="querycount" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; awk '\$1<2' bam/$sample.namesorted.bam.querycounts > bam/$sample.namesorted.bam.querycounts.lt2" | $QSUB | |
done | |
############################################################ | |
# Create tier the clipped and discordant BAM files. | |
############################################################ | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export SAMPLES="T10AA T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="clipper" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; clipper -i bam/$sample.namesorted.bam" | $QSUB | |
done | |
############################################################ | |
# Create a FASTA file from the clipped (and mostly merged) clipped reads. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="merger" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; merger -i bam/$sample.namesorted.bam.clip.bam > bam/$sample.namesorted.bam.clip.bam.fasta" | $QSUB | |
done | |
############################################################ | |
# Create a FASTQ files from the discordant reads. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export STEPNAME="merger" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; bamToFastq -i bam/$sample.namesorted.bam.disc.bam \ | |
-fq1 bam/$sample.namesorted.bam.disc.bam.1.fq \ | |
-fq2 bam/$sample.namesorted.bam.disc.bam.2.fq" | $QSUB | |
done | |
############################################################ | |
# Align the sof-clipped FASTA with BWA-SW for split-read detection. | |
# Z=10 | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="bwa-sw" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=10 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; bwa bwasw -t 8 -z 10 $GENOME \ | |
bam/$sample.namesorted.bam.clip.bam.fasta | \ | |
samtools view -Sb - \ | |
> bam/$sample.namesorted.bam.clip.bam.fasta.bam" | $QSUB | |
done | |
############################################################ | |
# Align the sof-clipped FASTA with BWA-SW for split-read detection. | |
# Z=1 | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="bwa-sw" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=10 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; bwa bwasw -t 8 $GENOME \ | |
bam/$sample.namesorted.bam.clip.bam.fasta | \ | |
samtools view -Sb - \ | |
> bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam" | $QSUB | |
done | |
############################################################ | |
# Align the disc. with Novoalign. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/novoalign/hg18_full.k15s2.novoindex | |
export STEPNAME="bwa-sw" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=24000m:ncpus=10 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; novoalign -c 8 -d $GENOME -f bam/$sample.namesorted.bam.disc.bam.1.fq bam/$sample.namesorted.bam.disc.bam.2.fq -i 180 36 -r Random -o SAM | samtools view -Sb - > bam/$sample.namesorted.bam.disc.bam" | $QSUB | |
done | |
############################################################ | |
# Sort/index the BWA-SW bams by position | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="bwa-sw" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=2 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; samtools sort -m 2000000000 \ | |
bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam \ | |
bam/$sample.namesorted.bam.clip.bam.fasta.z1.possrt; \ | |
samtools index bam/$sample.namesorted.bam.clip.bam.fasta.possrt.z1.bam;" | $QSUB | |
done | |
############################################################ | |
# split read BWA-SW to BEDPE. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="bedpe" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=6 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; samtools view bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam | \ | |
splitReadSamToBedpe -i stdin \ | |
> bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam.bedpe" | $QSUB | |
done | |
############################################################ | |
# Identify splitters from the BWA-SW alignments. | |
############################################################ | |
export SAMPLES="T10AA T10D T10H" | |
#export SAMPLES="T10AA T10AB T10D T10H" | |
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/ | |
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full | |
export STEPNAME="splitters" | |
for sample in `echo $SAMPLES` | |
do | |
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=6 -N $STEPNAME -m bea -M arq5x@virginia.edu" | |
echo "cd $TUMHOME; awk '\$15>=25' bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam.bedpe | \ | |
splitterToBreakpoint -i stdin \ | |
> bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam.splitters" | $QSUB | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment