Created
October 20, 2017 13:30
-
-
Save rpetit3/6cf46f0f5067a126f7dae71e2a0b36c5 to your computer and use it in GitHub Desktop.
Nextflow workflow
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env nextflow | |
params.output = null | |
params.sample = null | |
params.fq1 = null | |
params.fq2 = null | |
params.cpu = 1 | |
params.coverage = 100 | |
params.is_paired = false | |
params.no_length_filter = false | |
params.reference_path = null | |
params.force = false | |
params.clear_cache_on_success = false | |
// Set some global variables | |
reads = Channel.from(file(params.fq1)).buffer(size:1) | |
stats = Channel.from(file(params.fq1)).buffer(size:1) | |
if (params.fq2 != null) { | |
reads = Channel.from(file(params.fq1), file(params.fq2)).buffer(size:2) | |
stats = Channel.from(file(params.fq1), file(params.fq2)).buffer(size:2) | |
} | |
is_paired = params.is_paired | |
sample = params.sample | |
reference_path = params.reference_path | |
stats_folder = params.output + "/analyses/fastq-stats" | |
logs_folder= params.output + "/logs/fastq-cleanup" | |
cpu = params.cpu | |
// Test is output exists | |
final_fq = params.output + "/" + sample + ".cleanup.fastq.gz" | |
if (params.force) { | |
file(stats_folder).deleteDir() | |
file(logs_folder).deleteDir() | |
file(final_fq).delete() | |
} else if(file(final_fq).exists()) { | |
println "Found ${final_fq}, will not continue unless parameter 'force' is set to true." | |
System.exit(0) | |
} | |
// Setup directories and make them | |
file(stats_folder).mkdirs() | |
file(logs_folder).mkdirs() | |
// Original Stats | |
process original_stats { | |
publishDir stats_folder, mode: 'copy', overwrite: true | |
input: | |
file fq from stats | |
output: | |
file {"${sample}.original.fastq.json"} | |
shell: | |
if (is_paired) | |
''' | |
zcat !{fq[0]} !{fq[1]} | fastq-stats > !{sample}.original.fastq.json | |
''' | |
else | |
''' | |
zcat !{fq} | fastq-stats > !{sample}.original.fastq.json | |
''' | |
} | |
// BBduk phiX | |
process bbduk_phix { | |
publishDir logs_folder, mode: 'copy', overwrite: true, pattern: "*.txt" | |
input: | |
file fq from reads | |
output: | |
file '*.fq' into PHIX | |
file 'bbduk-phix.txt' | |
shell: | |
if (is_paired) | |
''' | |
bbduk.sh -Xmx2g threads=!{cpu} in=!{fq[0]} in2=!{fq[1]} out=bbduk-phix-R1.fq \ | |
out2=bbduk-phix-R2.fq stats=bbduk-phix.txt hdist=1 k=31 overwrite=t \ | |
ordered=t ref=!{reference_path}/phiX-NC_001422.fasta | |
''' | |
else | |
''' | |
bbduk.sh -Xmx2g threads=!{cpu} in=!{fq} out=bbduk-phix-R1.fq \ | |
stats=bbduk-phix.txt hdist=1 k=31 overwrite=t ordered=t \ | |
ref=!{reference_path}/phiX-NC_001422.fasta | |
''' | |
} | |
// BBduk adapter | |
process bbduk_adapter { | |
publishDir logs_folder, mode: 'copy', overwrite: true, pattern: "*.txt" | |
input: | |
file fq from PHIX | |
output: | |
file '*.fq' into ADAPTER_STATS, ADAPTER | |
file 'bbduk-adapter.txt' | |
shell: | |
if (is_paired) | |
''' | |
bbduk.sh -Xmx2g threads=!{cpu} in=!{fq[0]} in2=!{fq[1]} out=bbduk-adapter-R1.fq \ | |
out2=bbduk-adapter-R2.fq stats="bbduk-adapter.txt" ktrim=r k=23 mink=11 \ | |
hdist=1 tpe tbo qout=33 ref=!{reference_path}/adapters.fasta minlength=36 \ | |
overwrite=t ordered=t | |
''' | |
else | |
''' | |
bbduk.sh -Xmx2g threads=!{cpu} in=!{fq} out=bbduk-adapter-R1.fq \ | |
stats="bbduk-adapter.txt" ktrim=r k=23 mink=11 hdist=1 tpe tbo qout=33 \ | |
ref=!{reference_path}/adapters.fasta minlength=36 overwrite=t | |
ordered=t | |
''' | |
} | |
// post adapter stats | |
process adapter_stats { | |
publishDir stats_folder, mode: 'copy', overwrite: true | |
input: | |
file fq from ADAPTER_STATS | |
output: | |
file {"${sample}.adapter.fastq.json"} | |
shell: | |
if (is_paired) | |
''' | |
cat !{fq[0]} !{fq[1]} | fastq-stats > !{sample}.adapter.fastq.json | |
''' | |
else | |
''' | |
cat !{fq} | fastq-stats > !{sample}.adapter.fastq.json | |
''' | |
} | |
// SPAdes EC | |
process spades_ec { | |
publishDir logs_folder, mode: 'copy', overwrite: true, pattern: "*.log" | |
input: | |
file fq from ADAPTER | |
output: | |
file 'corrected/*.fastq' into SPADES, SPADES_STATS | |
file 'spades.log' | |
shell: | |
if (is_paired) | |
''' | |
spades.py -1 !{fq[0]} -2 !{fq[1]} --only-error-correction --disable-gzip-output -t !{cpu} -o ./ | |
''' | |
else | |
''' | |
spades.py -s !{fq} --only-error-correction --disable-gzip-output -t !{cpu} -o ./ | |
''' | |
} | |
// Post SPAdes stats | |
process spades_stats { | |
publishDir stats_folder, mode: 'copy', overwrite: true | |
input: | |
file fq from SPADES_STATS | |
output: | |
file {"${sample}.post-ecc.fastq.json"} into STATS | |
shell: | |
if (is_paired) | |
''' | |
cat !{fq[0]} !{fq[1]} | fastq-stats > !{sample}.post-ecc.fastq.json | |
''' | |
else | |
''' | |
cat !{fq} | fastq-stats > !{sample}.post-ecc.fastq.json | |
''' | |
} | |
// FASTQ clean up | |
process cleanup { | |
publishDir params.output, mode: 'copy', overwrite: true | |
input: | |
file fq from SPADES | |
file stats from STATS | |
output: | |
file {"${sample}.cleanup.fastq.gz"} into FINAL_FQ | |
shell: | |
no_length_filter = params.no_length_filter ? '--no_length_filter' : '' | |
if (is_paired) | |
''' | |
fastq-interleave !{fq[0]} !{fq[1]} | fastq_cleanup --paired --stats !{stats} \ | |
--coverage !{params.coverage} !{no_length_filter} | \ | |
gzip --best - > !{sample}.cleanup.fastq.gz | |
''' | |
else | |
''' | |
cat !{fq} | fastq_cleanup --stats !{stats} --coverage !{params.coverage} \ | |
!{no_length_filter} | gzip --best - > !{sample}.cleanup.fastq.gz | |
''' | |
} | |
// Final stats | |
process final_stats { | |
publishDir stats_folder, mode: 'copy', overwrite: true | |
input: | |
file fq from FINAL_FQ | |
output: | |
file {"${sample}.cleanup.fastq.json"} | |
shell: | |
''' | |
zcat !{fq} | fastq-stats > !{sample}.cleanup.fastq.json | |
''' | |
} | |
workflow.onComplete { | |
if (workflow.success == true && params.clear_cache_on_success) { | |
// No need to resume completed run so remove cache. | |
file('./work/').deleteDir() | |
} | |
println """ | |
Pipeline execution summary | |
--------------------------- | |
Completed at: ${workflow.complete} | |
Duration : ${workflow.duration} | |
Success : ${workflow.success} | |
workDir : ${workflow.workDir} | |
exit status : ${workflow.exitStatus} | |
Error report: ${workflow.errorReport ?: '-'} | |
""" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment