Skip to content

Instantly share code, notes, and snippets.

@robsyme

robsyme/main.nf Secret

Created July 8, 2016 06:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save robsyme/85ca8e50c7e1a06db003dd1a8b485187 to your computer and use it in GitHub Desktop.
Save robsyme/85ca8e50c7e1a06db003dd1a8b485187 to your computer and use it in GitHub Desktop.
#!/usr/bin/env nextflow
params.strain = "Arab_me14"
params.reference = "data/${params.strain}/reference/*.fasta"
params.reads = "data/${params.strain}/illumina/pairs/*[12].fastq.gz"
reference = file(params.reference).first()
reads = Channel
.fromPath(params.reads)
.toSortedList()
.flatten()
.take(4)
.map { path ->
matcher = (path.getFileName() =~ /(.*)_R[12].fastq.gz/)
matcher.matches()
return [matcher.group(1), path]
}
.groupTuple(sort: true, size: 2)
.set { illuminaPairs }
process tophat2 {
tag { runID }
cache 'deep'
cpus 4
input:
file reference
set val(runID), file("reads.*.fastq.gz") from illuminaPairs
output:
set val(runID), file("tophat_out/accepted_hits.bam") into mappedPairs
script:
match = (runID =~ /(\d+_\d+).*/)
match.matches()
def sampleID = match.group(1)
"""
bowtie2-build $reference reference \
&& ln -s $reference reference.fa
tophat2 \
--num-threads ${task.cpus} \
--microexon-search \
--b2-very-fast \
--min-intron-length 5 \
--max-intron-length 200 \
--rg-id ${runID} \
--rg-sample '${sampleID}' \
--rg-library '${sampleID}' \
--rg-description 'Illumina sequencing run ${runID} for sample ${sampleID}' \
--rg-platform ILLUMINA \
--library-type fr-firststrand \
reference \
reads.1.fastq.gz \
reads.2.fastq.gz
"""
}
mappedPairs
.map { runID, bamfile ->
matcher = (runID =~ /(\d+_\d+).*/)
matcher.matches()
return [matcher.group(1), bamfile]
}
.groupTuple()
.set { unmergedBams1 }
process merge_to_runs {
tag { sampleID }
input:
set val(sampleID), file("hits.*.bam") from unmergedBams1
output:
set val(sampleID), file("merged.bam") into sampleBams
"""
java -jar $baseDir/bin/picard.jar \
MergeSamFiles \
OUTPUT=merged.bam \
ASSUME_SORTED=true \
\$(printf ' INPUT=%s' hits.*.bam)
"""
}
sampleBams
.map { sampleID, bamfile ->
matcher = (sampleID =~ /(\d+)_\d+/)
matcher.matches()
Integer sampleNum = matcher.group(1).toInteger()
String timepoint = "unknown"
switch (sampleNum) {
case 11: timepoint = "demo";
case [33, 41, 49]: timepoint = "early";
case [34, 42, 50]: timepoint = "medium";
case [35, 43, 51]: timepoint = "late";
case [40, 48, 56]: timepoint = "culture";
}
return [timepoint, bamfile]
}
.groupTuple()
.set { unmergedBams2 }
process merge_to_conditions {
tag { timepoint }
input:
set val(timepoint), file("hits.*.bam") from unmergedBams2
output:
file("${params.strain}.${timepoint}.bam")
"""
java -jar $baseDir/bin/picard.jar \
MergeSamFiles \
OUTPUT=${params.strain}.${timepoint}.bam \
ASSUME_SORTED=true \
\$(printf ' INPUT=%s' hits.*.bam)
"""
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment