Skip to content

Instantly share code, notes, and snippets.

@arq5x
Last active September 24, 2015 22:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save arq5x/821179 to your computer and use it in GitHub Desktop.
Save arq5x/821179 to your computer and use it in GitHub Desktop.
T1D Loci Exome for all 7 pools on the PBS environment
############################################################
# Pair the alignments.
# Keep proper, on-target (i.e. +/- 500 bp of a probe) pairs.
# Require mapping quality >= 20
############################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-ex-bwa-par
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
export TARGETS=bed/probes.merged.slop1000.hg19.bed
for pool in `echo $POOLS`
do
# -m ea sends an email when job (e)nds or (a)borts
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=3 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $DIR;
bwa sampe -r '@RG\tID:$pool\tSM:$pool' $GENOME bam-hg19/$pool.1.sai bam-hg19/$pool.2.sai fastq/$pool.1.fq fastq/$pool.2.fq \
| samtools view -q 20 -f 2 -Su - \
| intersectBed -abam stdin -b $TARGETS \
> bam-hg19/$pool.conc.on.bam" | $QSUB
done
############################################################
# Sort and index the alignments.
############################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-ex-srtidx
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
for pool in `echo $POOLS`
do
# -m ea sends an email when job (e)nds or (a)borts
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $DIR; \
samtools sort -m 2000000000 bam-hg19/$pool.conc.on.bam bam-hg19/$pool.conc.on.pos; \
samtools index bam-hg19/$pool.conc.on.pos.bam" | $QSUB
done
############################################################
# Identify realignment targets.
# NEED TO MAKE A GATK SORTED REF GENOME
############################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-targets
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
for pool in `echo $POOLS`;
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=3 -N $STEPNAME -m bea -M arq5x@virginia.edu";
echo "cd $DIR; java -Xmx2g \
-jar /home/arq5x/cphg-home/bin/GenomeAnalysisTK.jar \
-T RealignerTargetCreator \
-R $GENOME \
-I bam-hg19/$pool.conc.on.pos.bam \
-o bam-hg19/$pool.conc.on.pos.bam.intervals" | $QSUB;
done
#############################################################
# Call variants with freebayes: INDEL-REALIGNED
#############################################################
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
export STEPNAME=t1d-freebayes
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
export VERSION=2013-01-18
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
echo "cd $DIR; freebayes -f $GENOME \
-b bam-hg19/pool1-63.conc.on.pos.realigned.bam \
-b bam-hg19/pool2-43.conc.on.pos.realigned.bam \
-b bam-hg19/pool2-63.conc.on.pos.realigned.bam \
-b bam-hg19/pool3-63.conc.on.pos.realigned.bam \
-b bam-hg19/pool4-63.conc.on.pos.realigned.bam \
-b bam-hg19/pool5-63.conc.on.pos.realigned.bam \
-b bam-hg19/pool6-63.conc.on.pos.realigned.bam \
-b bam-hg19/pool7-63.conc.on.pos.realigned.bam \
-p 20 \
-J \
--min-coverage 100 \
--min-base-quality 10 \
--min-alternate-total 5 \
--no-marginals \
-i \
> varCalling/$VERSION/raw.freebayes.rg.realigned.vcf" | $QSUB
# Where?
/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
##########################################################################################
# Create symlinks to the original data and rename the lanes by the pools-length sequenced
##########################################################################################
ln -s ../orig_data/pool1-63/s_7_1_sequence.txt pool1-63.1.fq
ln -s ../orig_data/pool1-63/s_7_2_sequence.txt pool1-63.2.fq
ln -s ../orig_data/pool2-43/s_5_1_sequence.txt pool2-43.1.fq
ln -s ../orig_data/pool2-43/s_5_2_sequence.txt pool2-43.2.fq
ln -s ../orig_data/pool2-63/s_8_1_sequence.txt pool2-63.1.fq
ln -s ../orig_data/pool2-63/s_8_2_sequence.txt pool2-63.2.fq
ln -s ../orig_data/pool3-63/s_3_1_sequence.txt pool3-63.1.fq
ln -s ../orig_data/pool3-63/s_3_2_sequence.txt pool3-63.2.fq
ln -s ../orig_data/pool4-63/s_4_1_sequence.txt pool4-63.1.fq
ln -s ../orig_data/pool4-63/s_4_2_sequence.txt pool4-63.2.fq
ln -s ../orig_data/pool5-63/s_5_1_sequence.txt pool5-63.1.fq
ln -s ../orig_data/pool5-63/s_5_2_sequence.txt pool5-63.2.fq
ln -s ../orig_data/pool6-63/s_6_1_sequence.txt pool6-63.1.fq
ln -s ../orig_data/pool6-63/s_6_2_sequence.txt pool6-63.2.fq
ln -s ../orig_data/pool7-63/s_7_1_sequence.txt pool7-63.1.fq
ln -s ../orig_data/pool7-63/s_7_2_sequence.txt pool7-63.2.fq
##########################################################################################
# Align the pools to hg18 with BWA
##########################################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-ex-bwa-aln
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
for pool in `echo $POOLS`
do
# -m ea sends an email when job (e)nds or (a)borts
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=4 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $DIR; bwa aln -t 4 $GENOME fastq/$pool.1.fq > bam/$pool.1.sai"
echo "cd $DIR; bwa aln -t 4 $GENOME fastq/$pool.2.fq > bam/$pool.2.sai"
done
##########################################################################################
# Align the pools to hg19 with BWA
##########################################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-ex-bwa-aln
# KATIE, FIX HG18 to HG19 (/home/arq5x/cphg-home/shared/genomes/hg19/bwa/gatk/hg19_gatk.fa)
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
for pool in `echo $POOLS`
do
# -m ea sends an email when job (e)nds or (a)borts
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=4 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $DIR; bwa aln -t 4 $GENOME fastq/$pool.1.fq > bam-hg19/$pool.1.sai"
echo "cd $DIR; bwa aln -t 4 $GENOME fastq/$pool.2.fq > bam-hg19/$pool.2.sai"
done
############################################################
# Pair the alignments.
# Keep proper, on-target (i.e. +/- 500 bp of a probe) pairs.
# Require mapping quality >= 20
############################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-ex-bwa-par
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
export TARGETS=bed/probes.merged.slop1000.bed
for pool in `echo $POOLS`
do
# -m ea sends an email when job (e)nds or (a)borts
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=3 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $DIR;
bwa sampe -r '@RG\tID:$pool\tSM:$pool' $GENOME bam/$pool.1.sai bam/$pool.2.sai fastq/$pool.1.fq fastq/$pool.2.fq \
| samtools view -q 20 -f 2 -Su - \
| intersectBed -abam stdin -b $TARGETS \
> bam/$pool.conc.on.bam" | $QSUB
done
############################################################
# Sort and index the alignments.
############################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-ex-srtidx
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
for pool in `echo $POOLS`
do
# -m ea sends an email when job (e)nds or (a)borts
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $DIR; \
samtools sort -m 2000000000 bam/$pool.conc.on.bam bam/$pool.conc.on.pos; \
samtools index bam/$pool.conc.on.pos.bam" | $QSUB
done
############################################################
# Identify realignment targets.
# NEED TO MAKE A GATK SORTED REF GENOME
############################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-targets
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
for pool in `echo $POOLS`;
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=3 -N $STEPNAME -m bea -M arq5x@virginia.edu";
echo "cd $DIR; java -Xmx10g \
-jar /home/arq5x/cphg-home/bin/GenomeAnalysisTK.jar \
-T RealignerTargetCreator \
-R $GENOME \
-I bam/$pool.conc.on.pos.bam \
-o bam/$pool.conc.on.pos.bam.intervals" | $QSUB;
done
############################################################
# Use GATK to realign the BAMs at the suspicious targets.
############################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-targets
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
for pool in `echo $POOLS`;
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=4 -N $STEPNAME -m bea -M arq5x@virginia.edu";
echo "cd $DIR; java -Xmx12g -Djava.io.tmpdir=$DIR/bam -jar /home/arq5x/cphg-home/bin/GenomeAnalysisTK.jar \
-T IndelRealigner \
--num_threads 1 \
-R $GENOME \
-targetIntervals bam/$pool.conc.on.pos.bam.intervals \
-I bam/$pool.conc.on.pos.bam \
-o bam/$pool.conc.on.pos.realigned.bam" | $QSUB;
done
############################################################
# Use Picard to mark duplicates.
############################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-targets
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/gatk/hg18_sorted.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
for pool in `echo $POOLS`;
do
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=8000m:ncpus=2 -N $STEPNAME -m bea -M arq5x@virginia.edu";
echo "cd $DIR; java -Xmx4g -jar /home/arq5x/cphg-home/bin/MarkDuplicates.jar \
INPUT=bam/$pool.conc.on.pos.realigned.bam \
OUTPUT=bam/$pool.conc.on.pos.realigned.dedup.bam \
TMP_DIR=$pool/bam \
METRICS_FILE=bam/$pool.markdup_metrics" | $QSUB;
done
############################################################
# Index the realigned and deduped BAMs.
############################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
export STEPNAME=t1d-ex-srtidx
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
for pool in `echo $POOLS`
do
# -m ea sends an email when job (e)nds or (a)borts
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=4000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $DIR; samtools index bam/$pool.conc.on.pos.realigned.dedup.bam" | $QSUB
done
#############################################################
# Call variants with freebayes: INDEL-REALIGNED
#############################################################
export GENOME=/home/arq5x/cphg-home/shared/genomes/hg18/bwa/hg18_full.fa
export POOLS="pool1-63 pool2-43 pool2-63 pool3-63 pool4-63 pool5-63 pool6-63 pool7-63"
export STEPNAME=t1d-freebayes
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=16000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
export VERSION=2011-04-04
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/
echo "cd $DIR; freebayes -f $GENOME \
-b bam/pool1-63.conc.on.pos.realigned.bam \
-b bam/pool2-43.conc.on.pos.realigned.bam \
-b bam/pool2-63.conc.on.pos.realigned.bam \
-b bam/pool3-63.conc.on.pos.realigned.bam \
-b bam/pool4-63.conc.on.pos.realigned.bam \
-b bam/pool5-63.conc.on.pos.realigned.bam \
-b bam/pool6-63.conc.on.pos.realigned.bam \
-b bam/pool7-63.conc.on.pos.realigned.bam \
-p 20 \
-J \
--min-coverage 100 \
--min-base-quality 10 \
--min-alternate-total 5 \
--no-marginals \
-i \
> varCalling/$VERSION/raw.freebayes.rg.realigned.vcf" | $QSUB
#############################################################################
# Cull the raw VCF file to those variants that are within 1000 bp of a target
#############################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling
export BED=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/bed
cd $DIR; intersectBed -a raw.freebayes.rg.vcf -b $BED/probes.merged.slop1000.bed > raw.freebayes.rg.on.slop1000.vcf
##############################################################################################
# Only keep variants with a total read depth of at least 50 and at least 5 alternate alleles.
##############################################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling
cd $DIR; cat raw.freebayes.rg.on.slop1000.vcf | ./vcfFilter.pl -d 100 -a 10 -c freebayes > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf
##############################################################################################
# Convert VCF to annovar format and carry along the extra info.
##############################################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling
cd $DIR; convert2annovar.pl --format vcf4 \
--includeinfo \
raw.freebayes.rg.on.slop1000.depth100.alt10.vcf \
> raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar
##############################################################################################
# Annotate the variants by gene.
##############################################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling
export ANNOVAR_PATH=/home/arq5x/cphg-home/bin/annovar
cd $DIR; annotate_variation.pl -buildver hg18 raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar $ANNOVAR_PATH/humandb/
##############################################################################################
# Download SIFT annotations for ANNOVAR.
# NOTE: No need to do this every time.
##############################################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling
export ANNOVAR_PATH=/home/arq5x/cphg-home/bin/annovar
annotate_variation.pl -downdb avsift $ANNOVAR_PATH/humandb/
##############################################################################################
# Annotate the variants by SIFT.
##############################################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling
export ANNOVAR_PATH=/home/arq5x/cphg-home/bin/annovar
export STEPNAME=anno_sift
export QSUB="qsub -q cphg -W group_list=CPHG -V -l select=1:mem=2000m:ncpus=1 -N $STEPNAME -m bea -M arq5x@virginia.edu"
echo "cd $DIR; annotate_variation.pl -sift_threshold 0 -filter -dbtype avsift \
raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar \
$ANNOVAR_PATH/humandb/" | $QSUB
##############################################################################################
# Standardize (fudge) the ANNOVAR output.
##############################################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling
export ANNOVAR_PATH=/home/arq5x/cphg-home/bin/annovar
cd $DIR
awk '{OFS=""; print "avsift\tfiltered", "\t", $0}' raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered.corrected
mv raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered.corrected raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered
cat raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_filtered \
raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_dropped \
> raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all
sort -k3,3 -k4,4n raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all.sorted
sort -k3,3 -k4,4n raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function.sorted
mv raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function.sorted raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function
mv raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all.sorted raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all
paste raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.hg18_avsift_all raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.variant_function | awk '{OFS="\t"; print $3,$4-1,$5,$6,$7,$8,$1,$2,$11,$12}' > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar
grep -v UNKNOWN raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.exonic_variant_function | \
awk '{OFS="\t"; print $5,$6-1,$7,$8,$9,$2,$4}' > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar.bed
##############################################################################################
# Bring in the synonymous/nonsynonymous annotations from
# the annovar "exonic_variant_function" file.
##############################################################################################
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling
cd $DIR
intersectBed -a raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar -b raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar.bed -wa -wb | cut -f 1-10,15,16 > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_exonic
intersectBed -a raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar -b raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar.bed -v | awk '{OFS=""; print $0,"\tnone\tnone"}' > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_intronic
cat raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_exonic raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_intronic | sort -k1,1 -k2,2n > raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_all.bed
##############################################################################################
# FIX
# Add the genotypes and other information from the GATK VCF file.
##############################################################################################
# Annotate the SNPs that are already known via dbSNP
export DBSNP=/home/arq5x/cphg-home/shared/annotations/hg18/dbSNP/dbSNP.131.hg19.maphg18
export DIR=/home/arq5x/cphg-home/projects/t1d/t1d-exome-suna/varCalling
cd $DIR
intersectBed -a raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_all.bed -b raw.freebayes.rg.on.slop1000.depth100.alt10.vcf -wa -wb | cut -f 1-12,20,22-100 | ./prettyUp.pl | windowBed -w 5 -a stdin -b $DBSNP | groupBy -g 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24 -c 28 -o collapse > all.dbSnp.rg.out
# Annotate the SNPs that are novel w.r.t. dbSNP
intersectBed -a raw.freebayes.rg.on.slop1000.depth100.alt10.vcf.annovar.sift_and_annovar_all.bed -b raw.freebayes.rg.on.slop1000.depth100.alt10.vcf -wa -wb | cut -f 1-12,20,22-100 | ./prettyUp.pl | windowBed -v -w 5 -a stdin -b $DBSNP | awk '{print $0,"\tnovel"}' > all.no_dbSnp.rg.out
# Combine the known and novel into a single file
cat all.dbSnp.rg.out all.no_dbSnp.rg.out > all.rg.out
# Try to identify regions that are likely to be alignment artifacts.
mergeBed -i all.rg.out -d 6 -n | awk '$4>=3' > alignment.artifacts.bed
# Add a flag to the end of each variant that indicates whether or not we should be suspicious of the variant.
intersectBed -a all.rg.out -b alignment.artifacts.bed -c > all.rg.flagged.out
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment