Skip to content

Instantly share code, notes, and snippets.

@rpetit3
Created October 20, 2017 13:30
Show Gist options
  • Save rpetit3/6cf46f0f5067a126f7dae71e2a0b36c5 to your computer and use it in GitHub Desktop.
Save rpetit3/6cf46f0f5067a126f7dae71e2a0b36c5 to your computer and use it in GitHub Desktop.
Nextflow workflow
#!/usr/bin/env nextflow
params.output = null
params.sample = null
params.fq1 = null
params.fq2 = null
params.cpu = 1
params.coverage = 100
params.is_paired = false
params.no_length_filter = false
params.reference_path = null
params.force = false
params.clear_cache_on_success = false
// Set some global variables
reads = Channel.from(file(params.fq1)).buffer(size:1)
stats = Channel.from(file(params.fq1)).buffer(size:1)
if (params.fq2 != null) {
reads = Channel.from(file(params.fq1), file(params.fq2)).buffer(size:2)
stats = Channel.from(file(params.fq1), file(params.fq2)).buffer(size:2)
}
is_paired = params.is_paired
sample = params.sample
reference_path = params.reference_path
stats_folder = params.output + "/analyses/fastq-stats"
logs_folder= params.output + "/logs/fastq-cleanup"
cpu = params.cpu
// Test is output exists
final_fq = params.output + "/" + sample + ".cleanup.fastq.gz"
if (params.force) {
file(stats_folder).deleteDir()
file(logs_folder).deleteDir()
file(final_fq).delete()
} else if(file(final_fq).exists()) {
println "Found ${final_fq}, will not continue unless parameter 'force' is set to true."
System.exit(0)
}
// Setup directories and make them
file(stats_folder).mkdirs()
file(logs_folder).mkdirs()
// Original Stats
process original_stats {
publishDir stats_folder, mode: 'copy', overwrite: true
input:
file fq from stats
output:
file {"${sample}.original.fastq.json"}
shell:
if (is_paired)
'''
zcat !{fq[0]} !{fq[1]} | fastq-stats > !{sample}.original.fastq.json
'''
else
'''
zcat !{fq} | fastq-stats > !{sample}.original.fastq.json
'''
}
// BBduk phiX
process bbduk_phix {
publishDir logs_folder, mode: 'copy', overwrite: true, pattern: "*.txt"
input:
file fq from reads
output:
file '*.fq' into PHIX
file 'bbduk-phix.txt'
shell:
if (is_paired)
'''
bbduk.sh -Xmx2g threads=!{cpu} in=!{fq[0]} in2=!{fq[1]} out=bbduk-phix-R1.fq \
out2=bbduk-phix-R2.fq stats=bbduk-phix.txt hdist=1 k=31 overwrite=t \
ordered=t ref=!{reference_path}/phiX-NC_001422.fasta
'''
else
'''
bbduk.sh -Xmx2g threads=!{cpu} in=!{fq} out=bbduk-phix-R1.fq \
stats=bbduk-phix.txt hdist=1 k=31 overwrite=t ordered=t \
ref=!{reference_path}/phiX-NC_001422.fasta
'''
}
// BBduk adapter
process bbduk_adapter {
publishDir logs_folder, mode: 'copy', overwrite: true, pattern: "*.txt"
input:
file fq from PHIX
output:
file '*.fq' into ADAPTER_STATS, ADAPTER
file 'bbduk-adapter.txt'
shell:
if (is_paired)
'''
bbduk.sh -Xmx2g threads=!{cpu} in=!{fq[0]} in2=!{fq[1]} out=bbduk-adapter-R1.fq \
out2=bbduk-adapter-R2.fq stats="bbduk-adapter.txt" ktrim=r k=23 mink=11 \
hdist=1 tpe tbo qout=33 ref=!{reference_path}/adapters.fasta minlength=36 \
overwrite=t ordered=t
'''
else
'''
bbduk.sh -Xmx2g threads=!{cpu} in=!{fq} out=bbduk-adapter-R1.fq \
stats="bbduk-adapter.txt" ktrim=r k=23 mink=11 hdist=1 tpe tbo qout=33 \
ref=!{reference_path}/adapters.fasta minlength=36 overwrite=t
ordered=t
'''
}
// post adapter stats
process adapter_stats {
publishDir stats_folder, mode: 'copy', overwrite: true
input:
file fq from ADAPTER_STATS
output:
file {"${sample}.adapter.fastq.json"}
shell:
if (is_paired)
'''
cat !{fq[0]} !{fq[1]} | fastq-stats > !{sample}.adapter.fastq.json
'''
else
'''
cat !{fq} | fastq-stats > !{sample}.adapter.fastq.json
'''
}
// SPAdes EC
process spades_ec {
publishDir logs_folder, mode: 'copy', overwrite: true, pattern: "*.log"
input:
file fq from ADAPTER
output:
file 'corrected/*.fastq' into SPADES, SPADES_STATS
file 'spades.log'
shell:
if (is_paired)
'''
spades.py -1 !{fq[0]} -2 !{fq[1]} --only-error-correction --disable-gzip-output -t !{cpu} -o ./
'''
else
'''
spades.py -s !{fq} --only-error-correction --disable-gzip-output -t !{cpu} -o ./
'''
}
// Post SPAdes stats
process spades_stats {
publishDir stats_folder, mode: 'copy', overwrite: true
input:
file fq from SPADES_STATS
output:
file {"${sample}.post-ecc.fastq.json"} into STATS
shell:
if (is_paired)
'''
cat !{fq[0]} !{fq[1]} | fastq-stats > !{sample}.post-ecc.fastq.json
'''
else
'''
cat !{fq} | fastq-stats > !{sample}.post-ecc.fastq.json
'''
}
// FASTQ clean up
process cleanup {
publishDir params.output, mode: 'copy', overwrite: true
input:
file fq from SPADES
file stats from STATS
output:
file {"${sample}.cleanup.fastq.gz"} into FINAL_FQ
shell:
no_length_filter = params.no_length_filter ? '--no_length_filter' : ''
if (is_paired)
'''
fastq-interleave !{fq[0]} !{fq[1]} | fastq_cleanup --paired --stats !{stats} \
--coverage !{params.coverage} !{no_length_filter} | \
gzip --best - > !{sample}.cleanup.fastq.gz
'''
else
'''
cat !{fq} | fastq_cleanup --stats !{stats} --coverage !{params.coverage} \
!{no_length_filter} | gzip --best - > !{sample}.cleanup.fastq.gz
'''
}
// Final stats
process final_stats {
publishDir stats_folder, mode: 'copy', overwrite: true
input:
file fq from FINAL_FQ
output:
file {"${sample}.cleanup.fastq.json"}
shell:
'''
zcat !{fq} | fastq-stats > !{sample}.cleanup.fastq.json
'''
}
workflow.onComplete {
if (workflow.success == true && params.clear_cache_on_success) {
// No need to resume completed run so remove cache.
file('./work/').deleteDir()
}
println """
Pipeline execution summary
---------------------------
Completed at: ${workflow.complete}
Duration : ${workflow.duration}
Success : ${workflow.success}
workDir : ${workflow.workDir}
exit status : ${workflow.exitStatus}
Error report: ${workflow.errorReport ?: '-'}
"""
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment