robsyme/main.nf Secret

## main.nf
#!/usr/bin/env nextflow

params.strain = "Arab_me14"
params.reference = "data/${params.strain}/reference/*.fasta"
params.reads = "data/${params.strain}/illumina/pairs/*[12].fastq.gz"

reference = file(params.reference).first()

reads = Channel
.fromPath(params.reads)
.toSortedList()
.flatten()
.take(4)
.map { path ->
  matcher = (path.getFileName() =~ /(.*)_R[12].fastq.gz/)
  matcher.matches()
  return [matcher.group(1), path]
}
.groupTuple(sort: true, size: 2)
.set { illuminaPairs }

process tophat2 {
  tag { runID }
  cache 'deep'
  cpus 4

  input:
  file reference
  set val(runID), file("reads.*.fastq.gz") from illuminaPairs

  output:
  set val(runID), file("tophat_out/accepted_hits.bam") into mappedPairs

  script:
  match = (runID =~ /(\d+_\d+).*/)
  match.matches()
  def sampleID = match.group(1)
  """
bowtie2-build $reference reference \
&& ln -s $reference reference.fa
tophat2 \
--num-threads ${task.cpus} \
--microexon-search \
--b2-very-fast \
--min-intron-length 5 \
--max-intron-length 200 \
--rg-id ${runID} \
--rg-sample '${sampleID}' \
--rg-library '${sampleID}' \
--rg-description 'Illumina sequencing run ${runID} for sample ${sampleID}' \
--rg-platform ILLUMINA \
--library-type fr-firststrand \
reference \
reads.1.fastq.gz \
reads.2.fastq.gz
  """
}

mappedPairs
.map { runID, bamfile ->
  matcher = (runID =~ /(\d+_\d+).*/)
  matcher.matches()
  return [matcher.group(1), bamfile]
 }
.groupTuple()
.set { unmergedBams1 }

process merge_to_runs {
  tag { sampleID }

  input:
  set val(sampleID), file("hits.*.bam") from unmergedBams1

  output:
  set val(sampleID), file("merged.bam") into sampleBams

  """
java -jar $baseDir/bin/picard.jar \
  MergeSamFiles \
  OUTPUT=merged.bam \
  ASSUME_SORTED=true \
  \$(printf ' INPUT=%s' hits.*.bam)
  """
}

sampleBams
.map { sampleID, bamfile ->
  matcher = (sampleID =~ /(\d+)_\d+/)
  matcher.matches()
  Integer sampleNum = matcher.group(1).toInteger()
  String timepoint = "unknown"
  switch (sampleNum) {
  case 11: timepoint = "demo";
  case [33, 41, 49]: timepoint = "early";
  case [34, 42, 50]: timepoint = "medium";
  case [35, 43, 51]: timepoint = "late";
  case [40, 48, 56]: timepoint = "culture";
  }

  return [timepoint, bamfile]
 }
.groupTuple()
.set { unmergedBams2 }

process merge_to_conditions {
  tag { timepoint }

  input:
  set val(timepoint), file("hits.*.bam") from unmergedBams2

  output:
  file("${params.strain}.${timepoint}.bam")

  """
java -jar $baseDir/bin/picard.jar \
  MergeSamFiles \
  OUTPUT=${params.strain}.${timepoint}.bam \
  ASSUME_SORTED=true \
  \$(printf ' INPUT=%s' hits.*.bam)
  """
}
	#!/usr/bin/env nextflow

	params.strain = "Arab_me14"
	params.reference = "data/${params.strain}/reference/*.fasta"
	params.reads = "data/${params.strain}/illumina/pairs/*[12].fastq.gz"

	reference = file(params.reference).first()

	reads = Channel
	.fromPath(params.reads)
	.toSortedList()
	.flatten()
	.take(4)
	.map { path ->
	matcher = (path.getFileName() =~ /(.*)_R[12].fastq.gz/)
	matcher.matches()
	return [matcher.group(1), path]
	}
	.groupTuple(sort: true, size: 2)
	.set { illuminaPairs }

	process tophat2 {
	tag { runID }
	cache 'deep'
	cpus 4

	input:
	file reference
	set val(runID), file("reads.*.fastq.gz") from illuminaPairs

	output:
	set val(runID), file("tophat_out/accepted_hits.bam") into mappedPairs

	script:
	match = (runID =~ /(\d+_\d+).*/)
	match.matches()
	def sampleID = match.group(1)
	"""
	bowtie2-build $reference reference \
	&& ln -s $reference reference.fa
	tophat2 \
	--num-threads ${task.cpus} \
	--microexon-search \
	--b2-very-fast \
	--min-intron-length 5 \
	--max-intron-length 200 \
	--rg-id ${runID} \
	--rg-sample '${sampleID}' \
	--rg-library '${sampleID}' \
	--rg-description 'Illumina sequencing run ${runID} for sample ${sampleID}' \
	--rg-platform ILLUMINA \
	--library-type fr-firststrand \
	reference \
	reads.1.fastq.gz \
	reads.2.fastq.gz
	"""
	}

	mappedPairs
	.map { runID, bamfile ->
	matcher = (runID =~ /(\d+_\d+).*/)
	matcher.matches()
	return [matcher.group(1), bamfile]
	}
	.groupTuple()
	.set { unmergedBams1 }

	process merge_to_runs {
	tag { sampleID }

	input:
	set val(sampleID), file("hits.*.bam") from unmergedBams1

	output:
	set val(sampleID), file("merged.bam") into sampleBams

	"""
	java -jar $baseDir/bin/picard.jar \
	MergeSamFiles \
	OUTPUT=merged.bam \
	ASSUME_SORTED=true \
	\$(printf ' INPUT=%s' hits.*.bam)
	"""
	}

	sampleBams
	.map { sampleID, bamfile ->
	matcher = (sampleID =~ /(\d+)_\d+/)
	matcher.matches()
	Integer sampleNum = matcher.group(1).toInteger()
	String timepoint = "unknown"
	switch (sampleNum) {
	case 11: timepoint = "demo";
	case [33, 41, 49]: timepoint = "early";
	case [34, 42, 50]: timepoint = "medium";
	case [35, 43, 51]: timepoint = "late";
	case [40, 48, 56]: timepoint = "culture";
	}

	return [timepoint, bamfile]
	}
	.groupTuple()
	.set { unmergedBams2 }

	process merge_to_conditions {
	tag { timepoint }

	input:
	set val(timepoint), file("hits.*.bam") from unmergedBams2

	output:
	file("${params.strain}.${timepoint}.bam")

	"""
	java -jar $baseDir/bin/picard.jar \
	MergeSamFiles \
	OUTPUT=${params.strain}.${timepoint}.bam \
	ASSUME_SORTED=true \
	\$(printf ' INPUT=%s' hits.*.bam)
	"""
	}