Created
October 21, 2019 15:14
-
-
Save ruchim/71fe74e0241b4e4a052e8ccecf6a0575 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## - Reference genome must be Hg38 with ALT contigs | |
## | |
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. | |
## For program versions, see docker containers. | |
## | |
## LICENSING : | |
## This script is released under the WDL source code license (BSD-3) (see LICENSE in | |
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may | |
## be subject to different licenses. Users are responsible for checking that they are | |
## authorized to run all programs before running this script. Please see the docker | |
## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed | |
## licensing information pertaining to the included programs. | |
# Local import | |
import "../../../../pipelines/dna_seq/UnmappedBamToAlignedBam.wdl" as ToBam | |
import "../../../../tasks/AggregatedBamQC.wdl" as AggregatedQC | |
import "../../../../tasks/GermlineVariantDiscovery.wdl" as Calling | |
import "../../../../tasks/Qc.wdl" as QC | |
import "../../../../tasks/Utilities.wdl" as Utils | |
import "../../../../tasks/BamToCram.wdl" as ToCram | |
import "../../../../tasks/VariantCalling.wdl" as ToGvcf | |
import "../../../../structs/dna_seq/germline/GermlineStructs.wdl" | |
# Git URL import | |
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/UnmappedBamToAlignedBam.wdl" as ToBam | |
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/AggregatedBamQC.wdl" as AggregatedQC | |
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/GermlineVariantDiscovery.wdl" as Calling | |
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/Qc.wdl" as QC | |
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/Utilities.wdl" as Utils | |
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/BamToCram.wdl" as ToCram | |
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/VariantCalling.wdl" as ToGvcf | |
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/structs/GermlineStructs.wdl" | |
# WORKFLOW DEFINITION | |
workflow WholeGenomeGermlineSingleSample { | |
input { | |
SampleAndUnmappedBams sample_and_unmapped_bams | |
GermlineSingleSampleReferences references | |
PapiSettings papi_settings | |
File wgs_coverage_interval_list | |
File? haplotype_database_file | |
Boolean provide_bam_output = false | |
Boolean use_gatk3_haplotype_caller = true | |
} | |
# Not overridable: | |
Int read_length = 250 | |
Float lod_threshold = -20.0 | |
String cross_check_fingerprints_by = "READGROUP" | |
String recalibrated_bam_basename = sample_and_unmapped_bams.base_file_name + ".aligned.duplicates_marked.recalibrated" | |
call ToBam.UnmappedBamToAlignedBam { | |
input: | |
sample_and_unmapped_bams = sample_and_unmapped_bams, | |
references = references, | |
papi_settings = papi_settings, | |
cross_check_fingerprints_by = cross_check_fingerprints_by, | |
haplotype_database_file = haplotype_database_file, | |
lod_threshold = lod_threshold, | |
recalibrated_bam_basename = recalibrated_bam_basename | |
} | |
call AggregatedQC.AggregatedBamQC { | |
input: | |
base_recalibrated_bam = UnmappedBamToAlignedBam.output_bam, | |
base_recalibrated_bam_index = UnmappedBamToAlignedBam.output_bam_index, | |
base_name = sample_and_unmapped_bams.base_file_name, | |
sample_name = sample_and_unmapped_bams.sample_name, | |
recalibrated_bam_base_name = recalibrated_bam_basename, | |
haplotype_database_file = haplotype_database_file, | |
references = references, | |
papi_settings = papi_settings | |
} | |
call ToCram.BamToCram as BamToCram { | |
input: | |
input_bam = UnmappedBamToAlignedBam.output_bam, | |
ref_fasta = references.reference_fasta.ref_fasta, | |
ref_fasta_index = references.reference_fasta.ref_fasta_index, | |
ref_dict = references.reference_fasta.ref_dict, | |
duplication_metrics = UnmappedBamToAlignedBam.duplicate_metrics, | |
chimerism_metrics = AggregatedBamQC.agg_alignment_summary_metrics, | |
base_file_name = sample_and_unmapped_bams.base_file_name, | |
agg_preemptible_tries = papi_settings.agg_preemptible_tries | |
} | |
# QC the sample WGS metrics (stringent thresholds) | |
call QC.CollectWgsMetrics as CollectWgsMetrics { | |
input: | |
input_bam = UnmappedBamToAlignedBam.output_bam, | |
input_bam_index = UnmappedBamToAlignedBam.output_bam_index, | |
metrics_filename = sample_and_unmapped_bams.base_file_name + ".wgs_metrics", | |
ref_fasta = references.reference_fasta.ref_fasta, | |
ref_fasta_index = references.reference_fasta.ref_fasta_index, | |
wgs_coverage_interval_list = wgs_coverage_interval_list, | |
read_length = read_length, | |
preemptible_tries = papi_settings.agg_preemptible_tries | |
} | |
# QC the sample raw WGS metrics (common thresholds) | |
call QC.CollectRawWgsMetrics as CollectRawWgsMetrics { | |
input: | |
input_bam = UnmappedBamToAlignedBam.output_bam, | |
input_bam_index = UnmappedBamToAlignedBam.output_bam_index, | |
metrics_filename = sample_and_unmapped_bams.base_file_name + ".raw_wgs_metrics", | |
ref_fasta = references.reference_fasta.ref_fasta, | |
ref_fasta_index = references.reference_fasta.ref_fasta_index, | |
wgs_coverage_interval_list = wgs_coverage_interval_list, | |
read_length = read_length, | |
preemptible_tries = papi_settings.agg_preemptible_tries | |
} | |
call ToGvcf.VariantCalling as BamToGvcf { | |
input: | |
calling_interval_list = references.calling_interval_list, | |
evaluation_interval_list = references.evaluation_interval_list, | |
haplotype_scatter_count = references.haplotype_scatter_count, | |
break_bands_at_multiples_of = references.break_bands_at_multiples_of, | |
contamination = UnmappedBamToAlignedBam.contamination, | |
input_bam = UnmappedBamToAlignedBam.output_bam, | |
ref_fasta = references.reference_fasta.ref_fasta, | |
ref_fasta_index = references.reference_fasta.ref_fasta_index, | |
ref_dict = references.reference_fasta.ref_dict, | |
dbsnp_vcf = references.dbsnp_vcf, | |
dbsnp_vcf_index = references.dbsnp_vcf_index, | |
base_file_name = sample_and_unmapped_bams.base_file_name, | |
final_vcf_base_name = sample_and_unmapped_bams.final_gvcf_base_name, | |
agg_preemptible_tries = papi_settings.agg_preemptible_tries, | |
use_gatk3_haplotype_caller = use_gatk3_haplotype_caller | |
} | |
if (provide_bam_output) { | |
File provided_output_bam = UnmappedBamToAlignedBam.output_bam | |
File provided_output_bam_index = UnmappedBamToAlignedBam.output_bam_index | |
} | |
# Outputs that will be retained when execution is complete | |
output { | |
Array[File] quality_yield_metrics = UnmappedBamToAlignedBam.quality_yield_metrics | |
Array[File] unsorted_read_group_base_distribution_by_cycle_pdf = UnmappedBamToAlignedBam.unsorted_read_group_base_distribution_by_cycle_pdf | |
Array[File] unsorted_read_group_base_distribution_by_cycle_metrics = UnmappedBamToAlignedBam.unsorted_read_group_base_distribution_by_cycle_metrics | |
Array[File] unsorted_read_group_insert_size_histogram_pdf = UnmappedBamToAlignedBam.unsorted_read_group_insert_size_histogram_pdf | |
Array[File] unsorted_read_group_insert_size_metrics = UnmappedBamToAlignedBam.unsorted_read_group_insert_size_metrics | |
Array[File] unsorted_read_group_quality_by_cycle_pdf = UnmappedBamToAlignedBam.unsorted_read_group_quality_by_cycle_pdf | |
Array[File] unsorted_read_group_quality_by_cycle_metrics = UnmappedBamToAlignedBam.unsorted_read_group_quality_by_cycle_metrics | |
Array[File] unsorted_read_group_quality_distribution_pdf = UnmappedBamToAlignedBam.unsorted_read_group_quality_distribution_pdf | |
Array[File] unsorted_read_group_quality_distribution_metrics = UnmappedBamToAlignedBam.unsorted_read_group_quality_distribution_metrics | |
File read_group_alignment_summary_metrics = AggregatedBamQC.read_group_alignment_summary_metrics | |
File read_group_gc_bias_detail_metrics = AggregatedBamQC.read_group_gc_bias_detail_metrics | |
File read_group_gc_bias_pdf = AggregatedBamQC.read_group_gc_bias_pdf | |
File read_group_gc_bias_summary_metrics = AggregatedBamQC.read_group_gc_bias_summary_metrics | |
File? cross_check_fingerprints_metrics = UnmappedBamToAlignedBam.cross_check_fingerprints_metrics | |
File selfSM = UnmappedBamToAlignedBam.selfSM | |
Float contamination = UnmappedBamToAlignedBam.contamination | |
File calculate_read_group_checksum_md5 = AggregatedBamQC.calculate_read_group_checksum_md5 | |
File agg_alignment_summary_metrics = AggregatedBamQC.agg_alignment_summary_metrics | |
File agg_bait_bias_detail_metrics = AggregatedBamQC.agg_bait_bias_detail_metrics | |
File agg_bait_bias_summary_metrics = AggregatedBamQC.agg_bait_bias_summary_metrics | |
File agg_gc_bias_detail_metrics = AggregatedBamQC.agg_gc_bias_detail_metrics | |
File agg_gc_bias_pdf = AggregatedBamQC.agg_gc_bias_pdf | |
File agg_gc_bias_summary_metrics = AggregatedBamQC.agg_gc_bias_summary_metrics | |
File agg_insert_size_histogram_pdf = AggregatedBamQC.agg_insert_size_histogram_pdf | |
File agg_insert_size_metrics = AggregatedBamQC.agg_insert_size_metrics | |
File agg_pre_adapter_detail_metrics = AggregatedBamQC.agg_pre_adapter_detail_metrics | |
File agg_pre_adapter_summary_metrics = AggregatedBamQC.agg_pre_adapter_summary_metrics | |
File agg_quality_distribution_pdf = AggregatedBamQC.agg_quality_distribution_pdf | |
File agg_quality_distribution_metrics = AggregatedBamQC.agg_quality_distribution_metrics | |
File agg_error_summary_metrics = AggregatedBamQC.agg_error_summary_metrics | |
File? fingerprint_summary_metrics = AggregatedBamQC.fingerprint_summary_metrics | |
File? fingerprint_detail_metrics = AggregatedBamQC.fingerprint_detail_metrics | |
File wgs_metrics = CollectWgsMetrics.metrics | |
File raw_wgs_metrics = CollectRawWgsMetrics.metrics | |
File duplicate_metrics = UnmappedBamToAlignedBam.duplicate_metrics | |
File output_bqsr_reports = UnmappedBamToAlignedBam.output_bqsr_reports | |
File gvcf_summary_metrics = BamToGvcf.vcf_summary_metrics | |
File gvcf_detail_metrics = BamToGvcf.vcf_detail_metrics | |
File? output_bam = provided_output_bam | |
File? output_bam_index = provided_output_bam_index | |
File output_cram = BamToCram.output_cram | |
File output_cram_index = BamToCram.output_cram_index | |
File output_cram_md5 = BamToCram.output_cram_md5 | |
File validate_cram_file_report = BamToCram.validate_cram_file_report | |
File output_vcf = BamToGvcf.output_vcf | |
File output_vcf_index = BamToGvcf.output_vcf_index | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment