Skip to content

Instantly share code, notes, and snippets.

@arq5x
Created June 16, 2011 18:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save arq5x/1029917 to your computer and use it in GitHub Desktop.
Save arq5x/1029917 to your computer and use it in GitHub Desktop.
Navin Main Processing
############################################################
# Index the position-sorted BAM files.
############################################################
export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export STEPNAME="bam-index"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; samtools index bam/$sample.*.bam" | $QSUB
done
############################################################
# Collect insert size metrics.
############################################################
export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export STEPNAME="picard-isize"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "java -jar /home/arq5x/cphg-home/bin/CollectInsertSizeMetrics.jar INPUT=$TUMHOME/bam/$sample.bam OUTPUT=$TUMHOME/bam/$sample.bam.isize H=$TUMHOME/bam/$sample.bam.hist STOP_AFTER=10000000" | $QSUB
done
############################################################
# Flagstat.
############################################################
export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export STEPNAME="flagstat"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; samtools flagstat bam/$sample.bam > bam/$sample.bam.flagstat" | $QSUB
done
############################################################
# Create name sorted BAM files.
############################################################
export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export STEPNAME="namesort"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; samtools sort -n -m 2000000000 bam/$sample.bam bam/$sample.namesorted" | $QSUB
done
############################################################
# Count the number of ends for each pair
############################################################
export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export STEPNAME="querycount"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=4 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; samtools view bam/$sample.namesorted.bam | cut -f 1 | uniq -c > bam/$sample.namesorted.bam.querycounts" | $QSUB
done
############################################################
# How many pairs have only one end in the BAM?
############################################################
export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export STEPNAME="querycount"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; awk '\$1<2' bam/$sample.namesorted.bam.querycounts > bam/$sample.namesorted.bam.querycounts.lt2" | $QSUB
done
############################################################
# Create tier the clipped and discordant BAM files.
############################################################
#export SAMPLES="T10AA T10AB T10D T10H"
export SAMPLES="T10AA T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export STEPNAME="clipper"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; clipper -i bam/$sample.namesorted.bam" | $QSUB
done
############################################################
# Create a FASTA file from the clipped (and mostly merged) clipped reads.
############################################################
export SAMPLES="T10AA T10D T10H"
#export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export STEPNAME="merger"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; merger -i bam/$sample.namesorted.bam.clip.bam > bam/$sample.namesorted.bam.clip.bam.fasta" | $QSUB
done
############################################################
# Create a FASTQ files from the discordant reads.
############################################################
export SAMPLES="T10AA T10D T10H"
#export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export STEPNAME="merger"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; bamToFastq -i bam/$sample.namesorted.bam.disc.bam \
-fq1 bam/$sample.namesorted.bam.disc.bam.1.fq \
-fq2 bam/$sample.namesorted.bam.disc.bam.2.fq" | $QSUB
done
############################################################
# Align the sof-clipped FASTA with BWA-SW for split-read detection.
# Z=10
############################################################
export SAMPLES="T10AA T10D T10H"
#export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full
export STEPNAME="bwa-sw"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=10 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; bwa bwasw -t 8 -z 10 $GENOME \
bam/$sample.namesorted.bam.clip.bam.fasta | \
samtools view -Sb - \
> bam/$sample.namesorted.bam.clip.bam.fasta.bam" | $QSUB
done
############################################################
# Align the sof-clipped FASTA with BWA-SW for split-read detection.
# Z=1
############################################################
export SAMPLES="T10AA T10D T10H"
#export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full
export STEPNAME="bwa-sw"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=10 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; bwa bwasw -t 8 $GENOME \
bam/$sample.namesorted.bam.clip.bam.fasta | \
samtools view -Sb - \
> bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam" | $QSUB
done
############################################################
# Align the disc. with Novoalign.
############################################################
export SAMPLES="T10AA T10D T10H"
#export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/novoalign/hg18_full.k15s2.novoindex
export STEPNAME="bwa-sw"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=24000m:ncpus=10 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; novoalign -c 8 -d $GENOME -f bam/$sample.namesorted.bam.disc.bam.1.fq bam/$sample.namesorted.bam.disc.bam.2.fq -i 180 36 -r Random -o SAM | samtools view -Sb - > bam/$sample.namesorted.bam.disc.bam" | $QSUB
done
############################################################
# Sort/index the BWA-SW bams by position
############################################################
export SAMPLES="T10AA T10D T10H"
#export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full
export STEPNAME="bwa-sw"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=2 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; samtools sort -m 2000000000 \
bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam \
bam/$sample.namesorted.bam.clip.bam.fasta.z1.possrt; \
samtools index bam/$sample.namesorted.bam.clip.bam.fasta.possrt.z1.bam;" | $QSUB
done
############################################################
# split read BWA-SW to BEDPE.
############################################################
export SAMPLES="T10AA T10D T10H"
#export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full
export STEPNAME="bedpe"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=6 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; samtools view bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam | \
splitReadSamToBedpe -i stdin \
> bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam.bedpe" | $QSUB
done
############################################################
# Identify splitters from the BWA-SW alignments.
############################################################
export SAMPLES="T10AA T10D T10H"
#export SAMPLES="T10AA T10AB T10D T10H"
export TUMHOME=/home/arq5x/cphg-home/projects/navin-tumor-heterogeneity/
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full
export STEPNAME="splitters"
for sample in `echo $SAMPLES`
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=6 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $TUMHOME; awk '\$15>=25' bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam.bedpe | \
splitterToBreakpoint -i stdin \
> bam/$sample.namesorted.bam.clip.bam.fasta.z1.bam.splitters" | $QSUB
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment