Last active
November 11, 2020 15:16
-
-
Save cjw85/23d2b0675ec5a5c7fd4074456524c971 to your computer and use it in GitHub Desktop.
Run DeepVariant on a VCF produced by medaka
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -eo pipefail | |
THREADS=8 | |
usage=" | |
DeepVariant genotyping of medaka variants. | |
First run: | |
medaka_variant -l -P 0 ... | |
before running this command. | |
$(basename "$0") [-h] -b <read.bam> -r <reference.bam> -v <medaka.vcf> -o <output> [-t threads] | |
-h show this help text. | |
-b medaka bam: round_0_hap_mixed_phased.bam. | |
-r fasta input reference. | |
-v medaka VCF: round_1.vcf. | |
-q use high quality model. | |
-o output directory. | |
-t number of threads with which to create features (default: ${THREADS})." | |
bflag=false | |
rflag=false | |
vflag=false | |
qflag=false | |
dflag=false | |
oflag=false | |
while getopts ":hb:r:t:v:o:d" opt | |
do | |
case "$opt" in | |
h ) echo "$usage" >&2; exit;; | |
b ) bflag=true; BAM="${OPTARG}";; | |
r ) rflag=true; REFERENCE="${OPTARG}";; | |
v ) vflag=true; MEDAKA_VCF="${OPTARG}";; | |
q ) qflag=true;; | |
o ) oflag=true; OUTPUT_DIR="${OPTARG}";; | |
t ) THREADS="${OPTARG}";; | |
d ) dflag=true;; | |
\? ) echo "Invalid option: -${OPTARG}." >&2; exit 1;; | |
: ) echo "Option -$OPTARG requires an argument." >&2; exit 1;; | |
esac | |
done | |
if ! $bflag || ! $rflag || ! $vflag || ! $oflag; then | |
echo "$usage" >&2; | |
echo "" >&2; | |
echo "-b, -r, -v, and -o must be specified." >&2; | |
exit 1; | |
fi | |
if ! $dflag; then | |
# copy everything to current working directory | |
echo "Copying files to ${OUTPUT_DIR}" | |
mkdir -p ${OUTPUT_DIR} | |
cp ${BAM} ${OUTPUT_DIR} | |
cp ${BAM}.bai ${OUTPUT_DIR} | |
cp ${REFERENCE} ${OUTPUT_DIR} | |
cp ${REFERENCE}.fai ${OUTPUT_DIR} | |
cp ${MEDAKA_VCF} ${OUTPUT_DIR} | |
cp $0 ${OUTPUT_DIR} | |
cd ${OUTPUT_DIR} | |
# run ourselves again with -d in docker | |
echo "Rerunning this script inside docker container" | |
image_tag=ce5edb282a7ef02dfa85f50b180fe6156241f4e1eaf1108f653708df16ccf6b0 | |
QARG="" | |
if $qflag; then qarg="-q"; fi | |
docker run \ | |
-v ${PWD}:/data \ | |
--user $(id -u):$(id -g) \ | |
--gpus all \ | |
kishwars/pepper_deepvariant_gpu@sha256:${image_tag} \ | |
/data/$(basename $0) \ | |
-b /data/$(basename ${BAM}) -r /data/$(basename ${REFERENCE}) \ | |
-v /data/$(basename ${MEDAKA_VCF}) -o . -t ${THREADS} ${QARG}\ | |
-d \ | |
else | |
echo "Running within docker." | |
cd /data | |
OUTPUT_DIR=. | |
MEDAKA_INPUT=${OUTPUT_DIR}/medaka.vcf.gz | |
grep -v lowqual ${MEDAKA_VCF} | bgzip -c > ${MEDAKA_INPUT} | |
tabix -p vcf ${MEDAKA_INPUT} | |
DV_INTER=${OUTPUT_DIR}/dv_intermediate_outputs/ | |
DV_MODEL=/opt/dv_models/ont-3bams-withHP/model.ckpt-33000 | |
DV_OUTPUT=${OUTPUT_DIR}/deepvariant.vcf.gz | |
EXTRA_ARGS="parse_sam_aux_fields=true,sort_by_haplotypes=true,realign_reads=false,variant_caller=vcf_candidate_importer,proposed_variants=${MEDAKA_INPUT}" | |
MODEL="/opt/dv_models/R941_guppy360_hp_v0_basic/model.ckpt-25600" | |
if $qflag; then | |
EXTRA_ARGS="alt_aligned_pileup=diff_channels,${EXTRA_ARGS}" | |
MODEL="/opt/dv_models/ont-3bams-withHP/model.ckpt-33000" | |
fi | |
mkdir -p ${DV_INTER} | |
/opt/deepvariant/bin/run_deepvariant \ | |
--model_type=WGS \ | |
--customized_model=${MODEL} \ | |
--ref=${REFERENCE} \ | |
--reads=${BAM} \ | |
--output_vcf=${DV_OUTPUT} \ | |
--intermediate_results_dir=${DV_INTER} \ | |
--num_shards=${THREADS} \ | |
--make_examples_extra_args="${EXTRA_ARGS}" | |
rm -rf ${DV_INTER} | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment