Skip to content

Instantly share code, notes, and snippets.

@ShujiaHuang
Created September 24, 2015 04:30
Show Gist options
  • Save ShujiaHuang/791599a443793c991d1b to your computer and use it in GitHub Desktop.
Save ShujiaHuang/791599a443793c991d1b to your computer and use it in GitHub Desktop.
#!/bin/bash
# If you adapt this script for your own use, you will need to set these two variables based on your environment.
# SV_DIR is the installation directory for SVToolkit - it must be an exported environment variable.
# SV_TMPDIR is a directory for writing temp files, which may be large if you have a large data set.
#export SV_DIR=`cd .. && pwd`
SV_DIR=/home/siyang/bin/software_pip/svtoolkit
SV_TMPDIR=
runDir=
bam=
#sy: bam without s signal in flag
sites=
genotypes=
#conf=/home/siyang/USER/yeweijian/project/AsmVar/Genome_STRip/test/test_genotyping/genstrip_Common_parameters.txt
conf=/home/siyang/USER/yeweijian/project/AsmVar/Genome_STRip/20140330Test/genstrip_OnlySplit_parameters.txt
REF=/project/DanishPanGenome/Public_Data/reference/human_g1k_v37_decoy.fasta
#REF=/home/siyang/AsmVar/GenomeDK/20130928_assembly_alignment/lastdb/human_g1k_v37_decoy.fasta
mask=/home/siyang/Database/Genome_STRip/ftp.broadinstitute.org/pub/svtoolkit/svmasks/human_g1k_v37.mask.100.fasta
ploidymaps=/home/siyang/Database/Genome_STRip/ftp.broadinstitute.org/pub/svtoolkit/ploidymaps/humgen_g1k_v37_ploidy.map
cnfa=/home/siyang/Database/Genome_STRip/ftp.broadinstitute.org/pub/svtoolkit/cn2masks/cn2_mask_g1k_v37.fasta
gender=/home/siyang/USER/yeweijian/project/AsmVar/Genome_STRip/sh/realgender.map
# These executables must be on your path.
which java > /dev/null || exit 1
which Rscript > /dev/null || exit 1
which samtools > /dev/null || exit 1
# For SVAltAlign, you must use the version of bwa compatible with Genome STRiP.
export PATH=${SV_DIR}/bwa:${PATH}
export LD_LIBRARY_PATH=${SV_DIR}/bwa:${LD_LIBRARY_PATH}
mx="-Xmx60g"
classpath="${SV_DIR}/lib/SVToolkit.jar:${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar:${SV_DIR}/lib/gatk/Queue.jar"
mkdir -p ${runDir}/logs || exit 1
mkdir -p ${runDir}/metadata || exit 1
# Display version information.
java -cp ${classpath} ${mx} -jar ${SV_DIR}/lib/SVToolkit.jar
# Run preprocessing.
# For large scale use, you should use -reduceInsertSizeDistributions, but this is too slow for the installation test.
# The method employed by -computeGCProfiles requires a CN2 copy number mask and is currently only supported for human genomes.
:<<BLOCK
java -cp ${classpath} ${mx} \
org.broadinstitute.sting.queue.QCommandLine \
-S ${SV_DIR}/qscript/SVPreprocess.q \
-S ${SV_DIR}/qscript/SVQScript.q \
-gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \
--disableJobReport \
-cp ${classpath} \
-configFile $conf \
-tempDir ${SV_TMPDIR} \
-R ${REF} \
-genomeMaskFile $mask \
-ploidyMapFile $ploidymaps \
-genderMapFile $gender \
-copyNumberMaskFile $cnfa \
-runDirectory ${runDir} \
-md ${runDir}/metadata \
-computeSizesInterval 20:1-63025520 \
-computeGCProfiles \
-jobLogDir ${runDir}/logs \
-I ${bam} \
-useMultiStep \
-run \
|| exit 1
# Run alt allele alignment.
java -cp ${classpath} ${mx} \
org.broadinstitute.sting.queue.QCommandLine \
-S ${SV_DIR}/qscript/SVAltAlign.q \
-S ${SV_DIR}/qscript/SVQScript.q \
-gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \
--disableJobReport \
-cp ${classpath} \
-configFile $conf \
-tempDir ${SV_TMPDIR} \
-R ${REF} \
-genomeMaskFile $mask \
-runDirectory ${runDir} \
-md ${runDir}/metadata \
-jobLogDir ${runDir}/logs \
-vcf ${sites} \
-I ${bam} \
-O ${runDir}/Altalign.bam \
-run \
|| exit 1
BLOCK
# Run genotyping (including the alt allele alignments)
java -cp ${classpath} ${mx} \
org.broadinstitute.sting.queue.QCommandLine \
-S ${SV_DIR}/qscript/SVGenotyper.q \
-S ${SV_DIR}/qscript/SVQScript.q \
-gatk ${SV_DIR}/lib/gatk/GenomeAnalysisTK.jar \
--disableJobReport \
-cp ${classpath} \
-configFile $conf \
-tempDir ${SV_TMPDIR} \
-R ${REF} \
-genomeMaskFile $mask \
-genderMapFile $gender \
-runDirectory ${runDir} \
-md ${runDir}/metadata \
-jobLogDir ${runDir}/logs \
-I ${bam} \
-vcf ${sites} \
-O ${genotypes} \
-run \
|| exit 1
(grep -v ^##fileDate= ${genotypes} | grep -v ^##source= | grep -v ^##reference= | diff -q - benchmark/${genotypes}) \
|| { echo "Error: test results do not match benchmark data"; exit 1; }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment