Skip to content

Instantly share code, notes, and snippets.

@arq5x
Last active December 23, 2015 05:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arq5x/6588467 to your computer and use it in GitHub Desktop.
Save arq5x/6588467 to your computer and use it in GitHub Desktop.
irradiated clones
############################################################
# Novoalign
############################################################
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa.novo.k14.s1.idx
export IRCHOME=/net/midtier18/vol79/cphg-quinlan2/projects/irradiated-clones
export STEPNAME=ircnovo
export QSUB="qsub -W group_list=cphg_arq5x -q arq5xlab -V -l select=1:mem=32000m:ncpus=16 -N $STEPNAME -m bea -M arq5x@virginia.edu";
echo "cd $IRCHOME; novoalign -d $GENOME -o SAM $'@RG\tID:parental\tSM:parental' -r Random \
-f fastq/CgmW_AGTCAA_L001_R1.fastq.gz fastq/CgmW_AGTCAA_L001_R2.fastq.gz \
-c 15 \
| samtools view -Sb - > bam/parental-L001.novoalign.rg.bam" | $QSUB
echo "cd $IRCHOME; novoalign -d $GENOME -o SAM $'@RG\tID:parental\tSM:parental' -r Random \
-f fastq/CgmW_AGTCAA_L002_R1.fastq.gz fastq/CgmW_AGTCAA_L002_R2.fastq.gz \
-c 15 \
| samtools view -Sb - > bam/parental-L002.novoalign.rg.bam" | $QSUB
echo "cd $IRCHOME; novoalign -d $GENOME -o SAM $'@RG\tID:parental\tSM:parental' -r Random \
-f fastq/CgmW_AGTCAA_L003_R1.fastq.gz fastq/CgmW_AGTCAA_L003_R2.fastq.gz \
-c 15 \
| samtools view -Sb - > bam/parental-L003.novoalign.rg.bam" | $QSUB
echo "cd $IRCHOME; novoalign -d $GENOME -o SAM $'@RG\tID:mock\tSM:mock' -r Random \
-f fastq/CRW5_AGTTCC_L004_R1.fastq.gz fastq/CRW5_AGTTCC_L004_R2.fastq.gz \
-c15 \
| samtools view -Sb - > bam/mock-L004.novoalign.rg.bam" | $QSUB
echo "cd $IRCHOME; novoalign -d $GENOME -o SAM $'@RG\tID:3Gy\tSM:3Gy' -r Random \
-f fastq/C13W_ATGTCA_L005_R1.fastq.gz fastq/C13W_ATGTCA_L005_R2.fastq.gz \
-c 15 \
| samtools view -Sb - > bam/3Gy-L005.novoalign.rg.bam" | $QSUB
echo "cd $IRCHOME; novoalign -d $GENOME -o SAM $'@RG\tID:6Gy_1\tSM:6Gy_1' -r Random \
-f fastq/C2AW_CCGTCC_L006_R1.fastq.gz fastq/C2AW_CCGTCC_L006_R2.fastq.gz \
-c 15 \
| samtools view -Sb - > bam/6Gy_1-L006.novoalign.rg.bam" | $QSUB
echo "cd $IRCHOME; novoalign -d $GENOME -o SAM $'@RG\tID:6Gy_2\tSM:6Gy_2' -r Random \
-f fastq/C11W_GTAGAG_L007_R1.fastq.gz fastq/C11W_GTAGAG_L007_R2.fastq.gz \
-c 15 \
| samtools view -Sb - > bam/6Gy_2-L007.novoalign.rg.bam" | $QSUB
echo "cd $IRCHOME; novoalign -d $GENOME -o SAM $'@RG\tID:6Gy_3\tSM:6Gy_3' -r Random \
-f fastq/C29W_GTCCGC_L008_R1.fastq.gz fastq/C29W_GTCCGC_L008_R2.fastq.gz \
-c 15 \
| samtools view -Sb - > bam/6Gy_3-L008.novoalign.rg.bam" | $QSUB
export IRRHOME=/net/midtier18/vol79/cphg-quinlan2/projects/irradiated-clones
export STEPNAME=irrcount
export QSUB="qsub -W group_list=cphg_arq5x -q arq5xlab -V -l select=1:mem=1000m:ncpus=2 -N $STEPNAME -m bea -M arq5x@virginia.edu";
echo "cd $IRRHOME; samtools view -b -q 1 bam/3Gy-L005.novoalign.rg.bam | bedtools coverage -abam - -counts -b bam/windows.5kb.bed > bam/3Gy-L005.novoalign.rg.bam.depths.5kb.q1.txt" | $QSUB
echo "cd $IRRHOME; samtools view -b -q 1 bam/6Gy_1-L006.novoalign.rg.bam | bedtools coverage -abam - -counts -b bam/windows.5kb.bed > bam/6Gy_1-L006.novoalign.rg.bam.depths.5kb.q1.txt" | $QSUB
echo "cd $IRRHOME; samtools view -b -q 1 bam/6Gy_2-L007.novoalign.rg.bam | bedtools coverage -abam - -counts -b bam/windows.5kb.bed > bam/6Gy_2-L007.novoalign.rg.bam.depths.5kb.q1.txt" | $QSUB
echo "cd $IRRHOME; samtools view -b -q 1 bam/6Gy_3-L008.novoalign.rg.bam | bedtools coverage -abam - -counts -b bam/windows.5kb.bed > bam/6Gy_3-L008.novoalign.rg.bam.depths.5kb.q1.q1.txt" | $QSUB
echo "cd $IRRHOME; samtools view -b -q 1 bam/mock-L004.novoalign.rg.bam | bedtools coverage -abam - -counts -b bam/windows.5kb.bed > bam/mock-L004.novoalign.rg.bam.depths.5kb.q1.txt" | $QSUB
echo "cd $IRRHOME; samtools view -b -q 1 bam/parental-L001.novoalign.rg.bam | bedtools coverage -abam - -counts -b bam/windows.5kb.bed > bam/parental-L001.novoalign.rg.bam.depths.5kb.q1.txt" | $QSUB
echo "cd $IRRHOME; samtools view -b -q 1 bam/parental-L002.novoalign.rg.bam | bedtools coverage -abam - -counts -b bam/windows.5kb.bed > bam/parental-L002.novoalign.rg.bam.depths.5kb.q1.txt" | $QSUB
echo "cd $IRRHOME; samtools view -b -q 1 bam/parental-L003.novoalign.rg.bam | bedtools coverage -abam - -counts -b bam/windows.5kb.bed > bam/parental-L003.novoalign.rg.bam.depths.5kb.q1.txt" | $QSUB
export IRRHOME=/net/midtier18/vol79/cphg-quinlan2/projects/irradiated-clones
export STEPNAME=irrsort
export QSUB="qsub -W group_list=cphg_arq5x -q arq5xlab -V -l select=1:mem=6000m:ncpus=8 -N $STEPNAME -m bea -M arq5x@virginia.edu";
echo "cd $IRRHOME; novosort -c 8 -t ./ -m 4G bam/3Gy-L005.novoalign.rg.bam bam/3Gy-L005.novoalign.rg.sorted.bam" | $QSUB
echo "cd $IRRHOME; novosort -c 8 -t ./ -m 4G bam/6Gy_1-L006.novoalign.rg.bam bam/6Gy_1-L006.novoalign.rg.sorted.bam" | $QSUB
echo "cd $IRRHOME; novosort -c 8 -t ./ -m 4G bam/6Gy_2-L007.novoalign.rg.bam bam/6Gy_2-L007.novoalign.rg.sorted.bam" | $QSUB
echo "cd $IRRHOME; novosort -c 8 -t ./ -m 4G bam/6Gy_3-L008.novoalign.rg.bam bam/6Gy_3-L008.novoalign.rg.sorted.bam" | $QSUB
echo "cd $IRRHOME; novosort -c 8 -t ./ -m 4G bam/mock-L004.novoalign.rg.bam bam/mock-L004.novoalign.rg.sorted.bam" | $QSUB
echo "cd $IRRHOME; novosort -c 8 -t ./ -m 4G bam/parental-L001.novoalign.rg.bam bam/parental-L001.novoalign.rg.sorted.bam" | $QSUB
echo "cd $IRRHOME; novosort -c 8 -t ./ -m 4G bam/parental-L002.novoalign.rg.bam bam/parental-L002.novoalign.rg.sorted.bam" | $QSUB
echo "cd $IRRHOME; novosort -c 8 -t ./ -m 4G bam/parental-L003.novoalign.rg.bam bam/parental-L003.novoalign.rg.sorted.bam" | $QSUB
python windowed_coverage.py -c data/3Gy-L005.novoalign.rg.bam.depths.5kb.txt -f ~/data/hg19.fa
python windowed_coverage.py -c data/6Gy_1-L006.novoalign.rg.bam.depths.5kb.txt -f ~/data/hg19.fa
python windowed_coverage.py -c data/6Gy_2-L007.novoalign.rg.bam.depths.5kb.txt -f ~/data/hg19.fa
python windowed_coverage.py -c data/6Gy_3-L008.novoalign.rg.bam.depths.5kb.txt -f ~/data/hg19.fa
python windowed_coverage.py -c data/mock-L004.novoalign.rg.bam.depths.5kb.txt -f ~/data/hg19.fa
python windowed_coverage.py -c data/parental-L001.novoalign.rg.bam.depths.5kb.txt -f ~/data/hg19.fa
python windowed_coverage.py -c data/parental-L002.novoalign.rg.bam.depths.5kb.txt -f ~/data/hg19.fa
python windowed_coverage.py -c data/parental-L003.novoalign.rg.bam.depths.5kb.txt -f ~/data/hg19.fa
python log_ratio.py -1 data/parental-L001.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L002.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/p1p2.bedg
python log_ratio.py -1 data/parental-L001.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L003.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/p1p3.bedg
python log_ratio.py -1 data/parental-L002.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L003.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/p2p3.bedg
python log_ratio.py -1 data/mock-L004.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L001.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/m1p1.bedg
python log_ratio.py -1 data/mock-L004.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L002.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/m1p2.bedg
python log_ratio.py -1 data/mock-L004.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L003.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/m1p3.bedg
python log_ratio.py -1 data/3Gy-L005.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L001.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/3Gp1.bedg
python log_ratio.py -1 data/3Gy-L005.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L002.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/3Gp2.bedg
python log_ratio.py -1 data/3Gy-L005.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L003.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/3Gp3.bedg
python log_ratio.py -1 data/6Gy_1-L006.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L001.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/6G1p1.bedg
python log_ratio.py -1 data/6Gy_1-L006.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L002.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/6G1p2.bedg
python log_ratio.py -1 data/6Gy_1-L006.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L003.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/6G1p3.bedg
python log_ratio.py -1 data/6Gy_2-L007.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L001.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/6G2p1.bedg
python log_ratio.py -1 data/6Gy_2-L007.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L002.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/6G2p2.bedg
python log_ratio.py -1 data/6Gy_2-L007.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L003.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/6G2p3.bedg
python log_ratio.py -1 data/6Gy_3-L008.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L001.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/6G3p1.bedg
python log_ratio.py -1 data/6Gy_3-L008.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L002.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/6G3p2.bedg
python log_ratio.py -1 data/6Gy_3-L008.novoalign.rg.bam.depths.5kb.txt.depth.txt -2 data/parental-L003.novoalign.rg.bam.depths.5kb.txt.depth.txt | grep -v "_" > data/6G3p3.bedg
setwd("/Users/arq5x/src/quinlan/codachrom/")
library(DNAcopy)
d <- read.table('data/p1p2.bedg')
CNA.object <- CNA( genomdat = d[,4], chrom = d[,1], maploc = d[,2]+(d[,3]-d[,2])/2, data.type = 'logratio', sampleid="p1p2")
CNA.smoothed <- smooth.CNA(CNA.object)
segs <- segment(CNA.smoothed, undo.splits = "sdundo", undo.SD = 1.5, verbose = 1, min.width=5)
pdf('figures/p1p2.pdf', height=10, width=10)
plot(segs, plot.type="s", ylim=c(-5,5))
dev.off()
setwd("/Users/arq5x/src/quinlan/codachrom/")
library(DNAcopy)
d <- read.table('data/3Gp2.bedg')
CNA.object <- CNA( genomdat = d[,4], chrom = d[,1], maploc = d[,2]+(d[,3]-d[,2])/2, data.type = 'logratio', sampleid="3Gp2")
segs <- segment(CNA.object, undo.splits = "sdundo", undo.SD = 1.5, verbose = 1, min.width=5)
pdf('figures/3Gp2.pdf', height=10, width=10)
plot(segs, plot.type="s", ylim=c(-5,5))
dev.off()
setwd("/Users/arq5x/src/quinlan/codachrom/")
library(DNAcopy)
d <- read.table('data/6G1p2.bedg')
CNA.object <- CNA( genomdat = d[,4], chrom = d[,1], maploc = d[,2]+(d[,3]-d[,2])/2, data.type = 'logratio', sampleid="6G1p2")
segs <- segment(CNA.object, undo.splits = "sdundo", undo.SD = 1.5, verbose = 1, min.width=5)
pdf('figures/6G1p2.pdf', height=10, width=10)
plot(segs, plot.type="s", ylim=c(-5,5))
dev.off()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment