Skip to content

Instantly share code, notes, and snippets.

@indraniel
Last active November 14, 2018 00:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save indraniel/4584b6c67111ca5e2e5d13eeb80ca1ad to your computer and use it in GitHub Desktop.
Save indraniel/4584b6c67111ca5e2e5d13eeb80ca1ad to your computer and use it in GitHub Desktop.
Example of the internals of a cromwell script (taken from the GenotypeGVCFs task of the GATK4 pipeline_)
#!/bin/bash
cd /cromwell_root
tmpDir=$(mkdir -p "/cromwell_root/tmp.b3852e07" && echo "/cromwell_root/tmp.b3852e07")
chmod 777 "$tmpDir"
export _JAVA_OPTIONS=-Djava.io.tmpdir="$tmpDir"
export TMPDIR="$tmpDir"
export HOME="$HOME"
(
cd /cromwell_root
touch /cromwell_root/monitoring.log
chmod u+x /cromwell_root/monitoring.sh
/cromwell_root/monitoring.sh > /cromwell_root/monitoring.log &
)
(
cd /cromwell_root
set -e
tar -xf /cromwell_root/wustl-ccdg-costa-rican-callset-2018-11/cromwell/cromwell-executions/JointGenotyping/94b704be-7057-4eda-9622-28cd51749cfd/call-ImportGVCFs/shard-8204/genomicsdb.tar
WORKSPACE=$( basename /cromwell_root/wustl-ccdg-costa-rican-callset-2018-11/cromwell/cromwell-executions/JointGenotyping/94b704be-7057-4eda-9622-28cd51749cfd/call-ImportGVCFs/shard-8204/genomicsdb.tar .tar)
/gatk/gatk --java-options "-Xmx5g -Xms5g" \
GenotypeGVCFs \
-R /cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta \
-O output.vcf.gz \
-D gs://broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf \
-G StandardAnnotation \
--only-output-calls-starting-in-intervals \
--use-new-qual-calculator \
-V gendb://$WORKSPACE \
-L chr16:33757666-34097266
) > '/cromwell_root/stdout' 2> '/cromwell_root/stderr'
echo $? > /cromwell_root/rc.tmp
(
# add a .file in every empty directory to facilitate directory delocalization on the cloud
cd /cromwell_root
find . -type d -empty -print0 | xargs -0 -I % touch %/.file
)
(
cd /cromwell_root
sync
)
mv /cromwell_root/rc.tmp /cromwell_root/rc
task GenotypeGVCFs {
File workspace_tar
String interval
String output_vcf_filename
String gatk_path
File ref_fasta
File ref_fasta_index
File ref_dict
String dbsnp_vcf
String docker
Int disk_size
command <<<
set -e
tar -xf ${workspace_tar}
WORKSPACE=$( basename ${workspace_tar} .tar)
${gatk_path} --java-options "-Xmx5g -Xms5g" \
GenotypeGVCFs \
-R ${ref_fasta} \
-O ${output_vcf_filename} \
-D ${dbsnp_vcf} \
-G StandardAnnotation \
--only-output-calls-starting-in-intervals \
--use-new-qual-calculator \
-V gendb://$WORKSPACE \
-L ${interval}
>>>
runtime {
docker: docker
memory: "7 GB"
cpu: "2"
disks: "local-disk " + disk_size + " HDD"
preemptible: 5
}
output {
File output_vcf = "${output_vcf_filename}"
File output_vcf_index = "${output_vcf_filename}.tbi"
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment