Skip to content

Instantly share code, notes, and snippets.

@ruchim
Created October 21, 2019 15:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ruchim/71fe74e0241b4e4a052e8ccecf6a0575 to your computer and use it in GitHub Desktop.
Save ruchim/71fe74e0241b4e4a052e8ccecf6a0575 to your computer and use it in GitHub Desktop.
## - Reference genome must be Hg38 with ALT contigs
##
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
## For program versions, see docker containers.
##
## LICENSING :
## This script is released under the WDL source code license (BSD-3) (see LICENSE in
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may
## be subject to different licenses. Users are responsible for checking that they are
## authorized to run all programs before running this script. Please see the docker
## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed
## licensing information pertaining to the included programs.
# Local import
import "../../../../pipelines/dna_seq/UnmappedBamToAlignedBam.wdl" as ToBam
import "../../../../tasks/AggregatedBamQC.wdl" as AggregatedQC
import "../../../../tasks/GermlineVariantDiscovery.wdl" as Calling
import "../../../../tasks/Qc.wdl" as QC
import "../../../../tasks/Utilities.wdl" as Utils
import "../../../../tasks/BamToCram.wdl" as ToCram
import "../../../../tasks/VariantCalling.wdl" as ToGvcf
import "../../../../structs/dna_seq/germline/GermlineStructs.wdl"
# Git URL import
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/UnmappedBamToAlignedBam.wdl" as ToBam
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/AggregatedBamQC.wdl" as AggregatedQC
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/GermlineVariantDiscovery.wdl" as Calling
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/Qc.wdl" as QC
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/Utilities.wdl" as Utils
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/BamToCram.wdl" as ToCram
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/VariantCalling.wdl" as ToGvcf
#import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/structs/GermlineStructs.wdl"
# WORKFLOW DEFINITION
workflow WholeGenomeGermlineSingleSample {
input {
SampleAndUnmappedBams sample_and_unmapped_bams
GermlineSingleSampleReferences references
PapiSettings papi_settings
File wgs_coverage_interval_list
File? haplotype_database_file
Boolean provide_bam_output = false
Boolean use_gatk3_haplotype_caller = true
}
# Not overridable:
Int read_length = 250
Float lod_threshold = -20.0
String cross_check_fingerprints_by = "READGROUP"
String recalibrated_bam_basename = sample_and_unmapped_bams.base_file_name + ".aligned.duplicates_marked.recalibrated"
call ToBam.UnmappedBamToAlignedBam {
input:
sample_and_unmapped_bams = sample_and_unmapped_bams,
references = references,
papi_settings = papi_settings,
cross_check_fingerprints_by = cross_check_fingerprints_by,
haplotype_database_file = haplotype_database_file,
lod_threshold = lod_threshold,
recalibrated_bam_basename = recalibrated_bam_basename
}
call AggregatedQC.AggregatedBamQC {
input:
base_recalibrated_bam = UnmappedBamToAlignedBam.output_bam,
base_recalibrated_bam_index = UnmappedBamToAlignedBam.output_bam_index,
base_name = sample_and_unmapped_bams.base_file_name,
sample_name = sample_and_unmapped_bams.sample_name,
recalibrated_bam_base_name = recalibrated_bam_basename,
haplotype_database_file = haplotype_database_file,
references = references,
papi_settings = papi_settings
}
call ToCram.BamToCram as BamToCram {
input:
input_bam = UnmappedBamToAlignedBam.output_bam,
ref_fasta = references.reference_fasta.ref_fasta,
ref_fasta_index = references.reference_fasta.ref_fasta_index,
ref_dict = references.reference_fasta.ref_dict,
duplication_metrics = UnmappedBamToAlignedBam.duplicate_metrics,
chimerism_metrics = AggregatedBamQC.agg_alignment_summary_metrics,
base_file_name = sample_and_unmapped_bams.base_file_name,
agg_preemptible_tries = papi_settings.agg_preemptible_tries
}
# QC the sample WGS metrics (stringent thresholds)
call QC.CollectWgsMetrics as CollectWgsMetrics {
input:
input_bam = UnmappedBamToAlignedBam.output_bam,
input_bam_index = UnmappedBamToAlignedBam.output_bam_index,
metrics_filename = sample_and_unmapped_bams.base_file_name + ".wgs_metrics",
ref_fasta = references.reference_fasta.ref_fasta,
ref_fasta_index = references.reference_fasta.ref_fasta_index,
wgs_coverage_interval_list = wgs_coverage_interval_list,
read_length = read_length,
preemptible_tries = papi_settings.agg_preemptible_tries
}
# QC the sample raw WGS metrics (common thresholds)
call QC.CollectRawWgsMetrics as CollectRawWgsMetrics {
input:
input_bam = UnmappedBamToAlignedBam.output_bam,
input_bam_index = UnmappedBamToAlignedBam.output_bam_index,
metrics_filename = sample_and_unmapped_bams.base_file_name + ".raw_wgs_metrics",
ref_fasta = references.reference_fasta.ref_fasta,
ref_fasta_index = references.reference_fasta.ref_fasta_index,
wgs_coverage_interval_list = wgs_coverage_interval_list,
read_length = read_length,
preemptible_tries = papi_settings.agg_preemptible_tries
}
call ToGvcf.VariantCalling as BamToGvcf {
input:
calling_interval_list = references.calling_interval_list,
evaluation_interval_list = references.evaluation_interval_list,
haplotype_scatter_count = references.haplotype_scatter_count,
break_bands_at_multiples_of = references.break_bands_at_multiples_of,
contamination = UnmappedBamToAlignedBam.contamination,
input_bam = UnmappedBamToAlignedBam.output_bam,
ref_fasta = references.reference_fasta.ref_fasta,
ref_fasta_index = references.reference_fasta.ref_fasta_index,
ref_dict = references.reference_fasta.ref_dict,
dbsnp_vcf = references.dbsnp_vcf,
dbsnp_vcf_index = references.dbsnp_vcf_index,
base_file_name = sample_and_unmapped_bams.base_file_name,
final_vcf_base_name = sample_and_unmapped_bams.final_gvcf_base_name,
agg_preemptible_tries = papi_settings.agg_preemptible_tries,
use_gatk3_haplotype_caller = use_gatk3_haplotype_caller
}
if (provide_bam_output) {
File provided_output_bam = UnmappedBamToAlignedBam.output_bam
File provided_output_bam_index = UnmappedBamToAlignedBam.output_bam_index
}
# Outputs that will be retained when execution is complete
output {
Array[File] quality_yield_metrics = UnmappedBamToAlignedBam.quality_yield_metrics
Array[File] unsorted_read_group_base_distribution_by_cycle_pdf = UnmappedBamToAlignedBam.unsorted_read_group_base_distribution_by_cycle_pdf
Array[File] unsorted_read_group_base_distribution_by_cycle_metrics = UnmappedBamToAlignedBam.unsorted_read_group_base_distribution_by_cycle_metrics
Array[File] unsorted_read_group_insert_size_histogram_pdf = UnmappedBamToAlignedBam.unsorted_read_group_insert_size_histogram_pdf
Array[File] unsorted_read_group_insert_size_metrics = UnmappedBamToAlignedBam.unsorted_read_group_insert_size_metrics
Array[File] unsorted_read_group_quality_by_cycle_pdf = UnmappedBamToAlignedBam.unsorted_read_group_quality_by_cycle_pdf
Array[File] unsorted_read_group_quality_by_cycle_metrics = UnmappedBamToAlignedBam.unsorted_read_group_quality_by_cycle_metrics
Array[File] unsorted_read_group_quality_distribution_pdf = UnmappedBamToAlignedBam.unsorted_read_group_quality_distribution_pdf
Array[File] unsorted_read_group_quality_distribution_metrics = UnmappedBamToAlignedBam.unsorted_read_group_quality_distribution_metrics
File read_group_alignment_summary_metrics = AggregatedBamQC.read_group_alignment_summary_metrics
File read_group_gc_bias_detail_metrics = AggregatedBamQC.read_group_gc_bias_detail_metrics
File read_group_gc_bias_pdf = AggregatedBamQC.read_group_gc_bias_pdf
File read_group_gc_bias_summary_metrics = AggregatedBamQC.read_group_gc_bias_summary_metrics
File? cross_check_fingerprints_metrics = UnmappedBamToAlignedBam.cross_check_fingerprints_metrics
File selfSM = UnmappedBamToAlignedBam.selfSM
Float contamination = UnmappedBamToAlignedBam.contamination
File calculate_read_group_checksum_md5 = AggregatedBamQC.calculate_read_group_checksum_md5
File agg_alignment_summary_metrics = AggregatedBamQC.agg_alignment_summary_metrics
File agg_bait_bias_detail_metrics = AggregatedBamQC.agg_bait_bias_detail_metrics
File agg_bait_bias_summary_metrics = AggregatedBamQC.agg_bait_bias_summary_metrics
File agg_gc_bias_detail_metrics = AggregatedBamQC.agg_gc_bias_detail_metrics
File agg_gc_bias_pdf = AggregatedBamQC.agg_gc_bias_pdf
File agg_gc_bias_summary_metrics = AggregatedBamQC.agg_gc_bias_summary_metrics
File agg_insert_size_histogram_pdf = AggregatedBamQC.agg_insert_size_histogram_pdf
File agg_insert_size_metrics = AggregatedBamQC.agg_insert_size_metrics
File agg_pre_adapter_detail_metrics = AggregatedBamQC.agg_pre_adapter_detail_metrics
File agg_pre_adapter_summary_metrics = AggregatedBamQC.agg_pre_adapter_summary_metrics
File agg_quality_distribution_pdf = AggregatedBamQC.agg_quality_distribution_pdf
File agg_quality_distribution_metrics = AggregatedBamQC.agg_quality_distribution_metrics
File agg_error_summary_metrics = AggregatedBamQC.agg_error_summary_metrics
File? fingerprint_summary_metrics = AggregatedBamQC.fingerprint_summary_metrics
File? fingerprint_detail_metrics = AggregatedBamQC.fingerprint_detail_metrics
File wgs_metrics = CollectWgsMetrics.metrics
File raw_wgs_metrics = CollectRawWgsMetrics.metrics
File duplicate_metrics = UnmappedBamToAlignedBam.duplicate_metrics
File output_bqsr_reports = UnmappedBamToAlignedBam.output_bqsr_reports
File gvcf_summary_metrics = BamToGvcf.vcf_summary_metrics
File gvcf_detail_metrics = BamToGvcf.vcf_detail_metrics
File? output_bam = provided_output_bam
File? output_bam_index = provided_output_bam_index
File output_cram = BamToCram.output_cram
File output_cram_index = BamToCram.output_cram_index
File output_cram_md5 = BamToCram.output_cram_md5
File validate_cram_file_report = BamToCram.validate_cram_file_report
File output_vcf = BamToGvcf.output_vcf
File output_vcf_index = BamToGvcf.output_vcf_index
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment