Skip to content

Instantly share code, notes, and snippets.

@lindenb
Last active October 6, 2020 13:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lindenb/df980f2c660f48735ca0ea7c1a816651 to your computer and use it in GitHub Desktop.
Save lindenb/df980f2c660f48735ca0ea7c1a816651 to your computer and use it in GitHub Desktop.
test nextflow dsl2.

My first workflow using nextflow DSL2.

The workflow takes as input a CSV file with the columns sample,path-to-fq1,path-to-fq2:

  • Fastqc is invoked for each fastq

  • trimmomatic is invoked for each pair of fastq

  • Fastqc is then invoked for each trimmed fastq

nextflow.preview.dsl=2
process download {
tag "download ${url}"
input:
tuple url,saveas
output:
path("${saveas}")
script:
"""
wget -O "${saveas}" "${url}"
"""
}
nextflow.preview.dsl=2
params.fastqcadapters="VOID"
params.fastqccontaminants="VOID"
fastqcadapters = file(params.fastqcadapters)
fastqccontaminants = file(params.fastqccontaminants)
process fastqc {
tag "${prefix} ${sample} ${fq.name}"
input:
path contaminants
tuple prefix,sample, fq
output:
tuple prefix,sample,path("*.zip")
tuple prefix,sample,fq
script:
"""
module load fastqc
mkdir TMP
fastqc --dir TMP -o . --noextract --contaminants "${contaminants}" --quiet -f "fastq" ${fq}
"""
}
nextflow.preview.dsl=2
params.fastqs=null
include {fastqc as fastqc1} from './fastqc.nf'
include {fastqc as fastqc2} from './fastqc.nf'
include {https://gist.github.com/lindenb/df980f2c660f48735ca0ea7c1a816651} from './trimmomatic.nf'
include {download as wget1} from './download.nf'
workflow {
contaminants = wget1(["https://raw.githubusercontent.com/csf-ngs/fastqc/master/Contaminants/contaminant_list.txt","contaminants"])
fastqIn = Channel.fromPath(params.fastqs).
splitCsv(header: false,sep:',',strip:true).
map{T->[T[0],file(T[1]),file(T[2])]}.
flatMap{T->[["before",T[0],T[1]],["before",T[0],T[2]]]}
trimmed = fastqc1(contaminants,fastqIn)[1] |\
map {T->[T[1],T[2]]} |\
groupTuple |\
map{T->[T[0],T[1][0],T[1][1] ]} |\
trimmomatic |\
flatMap{T->[["after",T[0],T[1]],["after",T[0],T[2]]]}
fastqc2(contaminants,trimmed)
}
PREFIX=20201006.DSL2
OUTDIR=work
all: main.nf $(OUTDIR)/jeter.fastqs.list
mkdir -p $(OUTDIR)
nextflow run -with-dag workflow.dot -with-report -with-timeline -with-trace -resume \
-work-dir "$(OUTDIR)" \
--fastqs $(OUTDIR)/jeter.fastqs.list \
$<
@echo "output is $(OUTDIR)"
$(OUTDIR)/jeter.fastqs.list:
mkdir -p $(OUTDIR)
echo "S1,/sandbox/users/lindenbaum-p/src/jvarkit-git/src/test/resources/S1.R1.fq.gz,/sandbox/users/lindenbaum-p/src/jvarkit-git/src/test/resources/S1.R2.fq.gz" > $@
echo "S2,/sandbox/users/lindenbaum-p/src/jvarkit-git/src/test/resources/S2.R1.fq.gz,/sandbox/users/lindenbaum-p/src/jvarkit-git/src/test/resources/S2.R2.fq.gz" >> $@
test -s $@
workdir:
@echo $(OUTDIR)
clean:
rm -rf "$(OUTDIR)"
springclean:
rm -rf ".nextflow" report.htm* timeline* trace.tx* workflow.dot*
nextflow.preview.dsl=2
process trimmomatic {
cache "lenient"
tag "${sample}"
input:
tuple sample, fq1, fq2
output:
tuple sample,path("${sample}.trimmed1.fq.gz"),path("${sample}.trimmed2.fq.gz")
script:
"""
module load trimmomatic
# trim fastq using trimmomatic
cat << EOF > TruSeq2-PE.fa
>PrefixPE/1
AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
>PrefixPE/2
CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
>PCR_Primer1
AATGATACGGCGACCACCGAGATCTACACTCTTTCCCTACACGACGCTCTTCCGATCT
>PCR_Primer1_rc
AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT
>PCR_Primer2
CAAGCAGAAGACGGCATACGAGATCGGTCTCGGCATTCCTGCTGAACCGCTCTTCCGATCT
>PCR_Primer2_rc
AGATCGGAAGAGCGGTTCAGCAGGAATGCCGAGACCGATCTCGTATGCCGTCTTCTGCTTG
>FlowCell1
TTTTTTTTTTAATGATACGGCGACCACCGAGATCTACAC
>FlowCell2
TTTTTTTTTTCAAGCAGAAGACGGCATACGA
EOF
java -jar \${TRIMMOMATIC_JAR} PE -phred33 \
"${fq1}" "${fq2}" \
"${sample}.trimmed1.fq.gz" "output_forward_unpaired.fq.gz" \
"${sample}.trimmed2.fq.gz" "output_reverse_unpaired.fq.gz" \
ILLUMINACLIP:TruSeq2-PE.fa:2:30:10:2:keepBothReads LEADING:3 TRAILING:3 SLIDINGWINDOW:4:15 MINLEN:36
rm -f output_forward_unpaired.fq.gz output_reverse_unpaired.fq.gz TruSeq2-PE.fa
"""
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment