Skip to content

Instantly share code, notes, and snippets.

@Nanguage
Created August 24, 2017 01:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Nanguage/7707960ed56e45be2e43e72a634e21fc to your computer and use it in GitHub Desktop.
Save Nanguage/7707960ed56e45be2e43e72a634e21fc to your computer and use it in GitHub Desktop.
call variance with picard, GATK and samtools
#!/bin/bash
# call variance with picard, GATK and samtools
reference=$1
sample=$2
# path to software
PICARD="/path/to/picard.jar" # broadinstitute-picard-2.9.2-6-gbb529af
SAMTOOLS="/path/to/samtools" # samtools 1.2
BCFTOOLS="/path/to/bcftools" # bcftools 1.4.1-34-g0932add
GATK="/path/to/GenomeAnalysisTK.jar" # The Genome Analysis Toolkit (GATK) v3.7-0-gcfedb67
bwa index ${reference}
bwa mem -t 20 ${reference} ../../cleandata/${sample}.R1.clean.fq.gz ../../cleandata/${sample}.R2.clean.fq.gz > ${sample}.sam
#Picard
java -jar $PICARD SortSam R=${reference} I=${sample}.sam O=${sample}.reorder.sam SORT_ORDER=coordinate
$SAMTOOLS view -bS ${sample}.reorder.sam -o ${sample}.reorder.bam
java -jar $PICARD SortSam INPUT=${sample}.reorder.bam OUTPUT=${sample}.sort.bam SORT_ORDER=coordinate
java -jar $PICARD AddOrReplaceReadGroups I=${sample}.sort.bam O=${sample}.sort.addhead.bam ID=${sample}ID LB=${sample}ID PL=illumina PU=${sample}PU SM=${sample}
java -jar $PICARD MarkDuplicates I=${sample}.sort.addhead.bam O=${sample}.rmdup.bam VALIDATION_STRINGENCY=LENIENT MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 REMOVE_DUPLICATES=false M=${sample}.sort.addhead.rmdup.metric
$SAMTOOLS view -bS ${sample}.sam -o ${sample}.bam
$SAMTOOLS index ${sample}.rmdup.bam
$SAMTOOLS faidx ${reference}
java -jar $PICARD CreateSequenceDictionary R=${reference} O=$(expr match ${reference} "\(.*\).fa").dict
#GATK
java -jar $GATK -R ${reference} -T RealignerTargetCreator -I ${sample}.rmdup.bam -o ${sample}.realign.intervals
java -jar $GATK -R ${reference} -T IndelRealigner -targetIntervals ${sample}.realign.intervals -I ${sample}.rmdup.bam -o ${sample}.realign.bam
java -jar $GATK -R ${reference} -T UnifiedGenotyper -I ${sample}.rmdup.bam -o ${sample}.gatk.raw.vcf -glm BOTH
$SAMTOOLS index ${sample}.realign.bam
$SAMTOOLS mpileup -DSugf ${reference} ${sample}.realign.bam > ${sample}.samtools.raw.bcf
$BCFTOOLS view ${sample}.samtools.raw.bcf > ${sample}.samtools.raw.vcf
java -jar $GATK -R ${reference} -T SelectVariants --variant ${sample}.gatk.raw.vcf --concordance ${sample}.samtools.raw.vcf -o ${sample}.concordance.raw.vcf
MEANQUAL=$(awk '{ if ($1 !~ /#/) { total += $6; count++ } } END { print total/count }' ${sample}.concordance.raw.vcf)
java -jar $GATK -R ${reference} -T VariantFiltration --filterExpression "QD < 20.0 || ReadPosRankSum < -8.0 || FS > 10.0 || QUAL < $MEANQUAL" --filterName LowQualFilter --missingValuesInExpressionsShouldEvaluateAsFailing --variant ${sample}.concordance.raw.vcf --logging_level ERROR -o ${sample}.concordance.flt.vcf
grep -v "Filter" ${sample}.concordance.flt.vcf > ${sample}.concordance.flt.knowsites.vcf
java -jar $GATK -R ${reference} -T BaseRecalibrator -I ${sample}.realign.bam -o ${sample}.recal_data_1.grp -knownSites ${sample}.concordance.flt.knowsites.vcf
java -jar $GATK -R ${reference} -T BaseRecalibrator -I ${sample}.realign.bam -BQSR ${sample}.recal_data_1.grp -o ${sample}.recal_data_2.grp -knownSites ${sample}.concordance.flt.knowsites.vcf
java -jar $GATK -R ${reference} -T PrintReads -I ${sample}.realign.bam -o ${sample}.recal.bam -BQSR ${sample}.recal_data_1.grp
java -jar $GATK -R ${reference} -T AnalyzeCovariates -before ${sample}.recal_data_1.grp -after ${sample}.recal_data_2.grp -csv ${sample}.recal.grp.csv -plots ${sample}.recal.grp.pdf
java -jar $GATK -R ${reference} -T UnifiedGenotyper -I ${sample}.recal.bam -o ${sample}.snp.raw.vcf -glm SNP
java -jar $GATK -R ${reference} -T UnifiedGenotyper -I ${sample}.recal.bam -o ${sample}.indel.raw.vcf -glm INDEL
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment