bsmith89/Makefile

## Makefile
# ====================
#  Project Makefile
# ====================
#  User Configuration {{{1
# ====================

# Major targets {{{2
.PHONY: figs res
figs:
# [Partially redacted]

res: res/rrs.procd.otus.shared
res: res/rrs.procd.otus.tax
res: seq/rrs.procd.otus.reps.afn
# [Partially redacted]

# What files are generated on `make all`?
all: docs figs res

# Compute environment {{{2
# Name, and directory, of the python virtual environment:
VENV = .venv

# Directory for recipe-internal files and terminal documents:
BUILD_DIR := build

# Documentation settings {{{2
.PHONY: docs

docs: ${BUILD_DIR}/2016-02-02_pg-meeting.pdf

BIB_FILE=doc/main.bib
MAKEFILE_GRAPH_FLAGS=-d 'raw/seq/.*' -d 'seq/split/.*' -d 'meta/split/.*' -d 'res/split.*' -d 'raw/hplc/*'

# Preqs for major documentation targets {{{3
${BUILD_DIR}/README.%:

# Cleanup settings {{{2
# Use the following line to add files and directories to be deleted on `make clean`:
CLEANUP += *.docx *.html *.log *.logfile *.pbs.o* *.pbs.e* ${BUILD_DIR}/*

TESTCLEANUP = res/test* res/split/test* ${BUILD_DIR}/test* meta/test* seq/test* seq/split/test*
.PHONY: testclean
testclean:
	rm -rf ${TESTCLEANUP}

# Initialization settings {{{2
# What directories to generate on `make data-dirs`.
# By default, already includes build/ fig/
DATA_DIRS += raw/ raw/ref raw/seq \
             raw/hplc raw/hplc/proto \
             meta/ ref/ \
             res/ res/split \
             seq/ seq/split

# MOTHUR settings {{{2

# Link pre-requisites to the $BUILD dir
define MOTHUR_SETUP
@ln -fsrt ${BUILD_DIR}/ $^
# START: > $@
endef

define MOTHUR_TEARDOWN
# FINISH: > $@
endef

# Ordered list of pre-requisite link locations (in the $BUILD dir)
LN_PREQS = $(addprefix ${BUILD_DIR}/,$(notdir $^))

MOTHUR ?= mothur -q
# If you wanna swap in mothur-mpi use:
# MOTHUR ?= mpirun -np ${MAX_PROCS} mothur-mpi
# Requires that you have built `mothur-mpi` as a MPI capable version

# MOTHUR_STDOUT := >/dev/null
MOTHUR_STDOUT :=

MAX_PROCS ?= $(shell nproc)

# Random seed for reproduciblity.
SEED ?= 1

MIN_READS_PER_SAMPLE = 6000  # When to throw out samples from community matrix

NUM_PRECLUST_DIFFS = 2

OTU_CUTOFF ?= 0.03
DIST_CUTOFF ?= 0.15
CLUST_METHOD ?= average
SPLIT_TAXLEVEL ?= 4

ifeq (${CLUST_METHOD}, furthest)
	CLUST_METHOD_ABBRV := fn
else
ifeq (${CLUST_METHOD}, average)
	CLUST_METHOD_ABBRV := an
else
ifeq (${CLUST_METHOD}, nearest)
	CLUST_METHOD_ABBRV := nn  # TODO: Is this correct?
else
	CLUST_METHOD_ABBRV := an
endif
endif
endif

# FINALLY: Include base makefile {{{2
include base.mk

# ==============
#  Data {{{1
# ==============
# User defined recipes for cleaning up and initially parsing data.
# e.g. Slicing out columns, combining data sources, alignment, generating
# phylogenies, etc.

# 16S Library {{{2
# Reference sets {{{3

extract_from_tarball = tar -Oxzf ${1} ${2} > $@

raw/ref/Silva.nr_v119.tgz:
	wget --directory-prefix=${@D} http://www.mothur.org/w/images/2/27/Silva.nr_v119.tgz

ref/silva.nr.afn: raw/ref/Silva.nr_v119.tgz
	$(call extract_from_tarball,$^,silva.nr_v119.align)

ref/silva.nr.tax: raw/ref/Silva.nr_v119.tgz
	$(call extract_from_tarball,$^,silva.nr_v119.tax)

# TODO: Is this the region I really want?
ref/silva.nr.pcr-v4.afn: ref/silva.nr.afn
	${MOTHUR_SETUP}
	${MOTHUR} "#pcr.seqs(fasta=${LN_PREQS}, start=11894, end=25319, \
        keepdots=F, processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/silva.nr.pcr.afn $@
	${MOTHUR_TEARDOWN}

raw/ref/Silva.gold.bacteria.zip:
	wget --directory-prefix=${@D} http://www.mothur.org/w/images/f/f1/Silva.gold.bacteria.zip

ref/silva.gold.afn: raw/ref/Silva.gold.bacteria.zip
	unzip $^
	mv silva.gold.align $@
	@touch $@

raw/ref/Silva.seed_v119.tgz:
	wget --directory-prefix=${@D} http://www.mothur.org/w/images/1/15/Silva.seed_v123.tgz

ref/silva.seed.afn: raw/ref/Silva.seed_v119.tgz
	$(call extract_from_tarball,$^,silva.seed_v119.align)

ref/silva.seed.tax: raw/ref/Silva.seed_v119.tgz
	$(call extract_from_tarball,$^,silva.seed_v119.tax)

raw/ref/Trainset14_032015.pds.tgz:
	wget --directory-prefix=${@D} http://www.mothur.org/w/images/8/88/Trainset14_032015.pds.tgz

ref/rdp.nr.afn: raw/ref/Trainset14_032015.pds.tgz
	$(call extract_from_tarball,$^,trainset14_032015.pds/trainset14_032015.pds.fasta)

ref/rdp.nr.tax: raw/ref/Trainset14_032015.pds.tgz
	$(call extract_from_tarball,$^,trainset14_032015.pds/trainset14_032015.pds.tax)

# Metadata {{{3
# http://www.mothur.org/wiki/MiSeq_SOP#Getting_started
meta/rrs.files meta/%.rrs.files: scripts/generate_files_file.py \
        etc/rrs/library.tsv etc/rrs/analysis_group.tsv
	$^ $* > $@

# Dynamic `include`ing and generation of *.contigs.mk {{{3
# For every unique library.series, generate a makefile in build/ which
ALL_LIBRARY_ANALYSIS_GROUPS := \
        $(shell sed '1,1d' etc/rrs/analysis_group.tsv | cut -f2 | sort | uniq)
ALL_CONTIGS_MAKEFILES := \
        $(patsubst %,${BUILD_DIR}/%.rrs.contigs.mk,${ALL_LIBRARY_ANALYSIS_GROUPS}) \
        ${BUILD_DIR}/rrs.contigs.mk
ALL_LIBRARY_SERIES += all
ifeq (${INITIALIZED},True)
    $(foreach makefile,${ALL_CONTIGS_MAKEFILES},$(eval include ${makefile}))
endif

${BUILD_DIR}/%.contigs.mk: scripts/generate_contigs_makefile.py meta/%.files
	$^ $* > $@

raw/seq/%.fastq.gz: raw/seq/fake_root.tgz
	echo "This does not make $@.  If you actually run this you are doing something wrong."

# Make contigs from paired-end reads
# Preqs defined in each build/%.contigs.mk
seq/split/%.rrs.contigs.fn seq/split/%.rrs.contigs.qual res/split/%.rrs.contigs.report:
	gzip -dc $(word 1,$^) > ${BUILD_DIR}/$*.r1.fastq
	gzip -dc $(word 2,$^) > ${BUILD_DIR}/$*.r2.fastq
	${MOTHUR} "#make.contigs(ffastq=${BUILD_DIR}/$*.r1.fastq, \
        rfastq=${BUILD_DIR}/$*.r2.fastq, \
        trimoverlap=T)" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.r1.trim.contigs.fasta seq/split/$*.rrs.contigs.fn
	@mv ${BUILD_DIR}/$*.r1.trim.contigs.qual seq/split/$*.rrs.contigs.qual
	@mv ${BUILD_DIR}/$*.r1.contigs.report res/split/$*.rrs.contigs.report
	${MOTHUR_TEARDOWN}

# Concatenate individual contig files
# Preqs defined in each build/%.contigs.mk
seq/%.contigs.fn:
	cat $^ > $@

# Define groups based on names of individual contig files
# Preqs defined in each build/%.contigs.mk
meta/%.contigs.groups:
	$^ > $@

# Align, classify, screen and cluster seqs {{{3
# Screen 1: remove sequences which are the wrong length or have too many
# ambiguous nucleotides
# TODO: Screen params?
# TODO: bug report the problem with dashes in file names for some commands.
seq/%.screen1.fn meta/%.screen1.groups: seq/%.fn meta/%.groups
	${MOTHUR_SETUP}
	${MOTHUR} "#screen.seqs(fasta=$(word 1,${LN_PREQS}), group=$(word 2,${LN_PREQS}), \
        maxambig=0, maxlength=295, processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.good.fn seq/$*.screen1.fn
	@mv ${BUILD_DIR}/$*.good.groups meta/$*.screen1.groups
	${MOTHUR_TEARDOWN}

# Uniq-ify seqs {{{3
seq/%.uniq.fn meta/%.uniq.names: seq/%.fn
	${MOTHUR_SETUP}
	${MOTHUR} "#unique.seqs(fasta=${LN_PREQS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.unique.fn seq/$*.uniq.fn
	@mv ${BUILD_DIR}/$*.names meta/$*.uniq.names
	${MOTHUR_TEARDOWN}

seq/%.uniq.afn meta/%.uniq.names: seq/%.afn
	${MOTHUR_SETUP}
	${MOTHUR} "#unique.seqs(fasta=${LN_PREQS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.unique.afn seq/$*.uniq.afn
	@mv ${BUILD_DIR}/$*.names meta/$*.uniq.names
	${MOTHUR_TEARDOWN}

meta/%.contigs.screen1.uniq.count_table: meta/%.contigs.screen1.uniq.names meta/%.contigs.screen1.groups
	${MOTHUR_SETUP}
	${MOTHUR} "#count.seqs(name=$(word 1,${LN_PREQS}), \
        group=$(word 2,${LN_PREQS}), processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.contigs.screen1.uniq.count_table meta/$*.contigs.screen1.uniq.count_table
	${MOTHUR_TEARDOWN}

# Alignment {{{3
seq/%.align.afn res/%.align.report: seq/%.fn ref/silva.nr.pcr-v4.afn
	${MOTHUR_SETUP}
	${MOTHUR} "#align.seqs(fasta=$(word 1,${LN_PREQS}), \
        reference=$(word 2,${LN_PREQS}), processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.align seq/$*.align.afn
	@mv ${BUILD_DIR}/$*.align.report res/$*.align.report
	${MOTHUR_TEARDOWN}

# Sequence cleanup and screening {{{3
seq/%.align.screen2.afn meta/%.align.screen2.count_table: seq/%.align.afn meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#screen.seqs(fasta=$(word 1,${LN_PREQS}), start=3100, \
        count=$(word 2,${LN_PREQS}), end=10600, maxhomop=8, \
        processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.align.good.afn seq/$*.align.screen2.afn
	@mv ${BUILD_DIR}/$*.good.count_table meta/$*.align.screen2.count_table
	${MOTHUR_TEARDOWN}

seq/%.trim.afn: seq/%.afn
	${MOTHUR_SETUP}
	${MOTHUR} "#filter.seqs(fasta=$(word 1,${LN_PREQS}), vertical=T, trump=., \
        processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.filter.fasta seq/$*.trim.afn
	${MOTHUR_TEARDOWN}

seq/%.trim.uniq.afn meta/%.trim.uniq.count_table: seq/%.trim.afn meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#unique.seqs(fasta=$(word 1,${LN_PREQS}), \
        count=$(word 2,${LN_PREQS}))" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.trim.unique.afn seq/$*.trim.uniq.afn
	@mv ${BUILD_DIR}/$*.trim.count_table meta/$*.trim.uniq.count_table
	${MOTHUR_TEARDOWN}

# TODO: Why doesn't the .name and .groups file combination work?
# I get told that there are names in the .names file which are not in the
# fasta, but those names are sub-names of uniq seqs, so that's what I expect...?
seq/%.preclust.afn meta/%.preclust.count_table: \
    seq/%.afn \
    meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#pre.cluster(fasta=$(word 1,${LN_PREQS}), \
        count=$(word 2,${LN_PREQS}), diffs=${NUM_PRECLUST_DIFFS}, \
        processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.precluster.afn seq/$*.preclust.afn
	@mv ${BUILD_DIR}/$*.precluster.count_table meta/$*.preclust.count_table
	${MOTHUR_TEARDOWN}

# Chimera screening {{{3
seq/%.screen3.afn res/%.screen3.chimeras meta/%.screen3.count_table: seq/%.afn meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#chimera.uchime(fasta=$(word 1,${LN_PREQS}), \
        count=$(word 2,${LN_PREQS}), \
        dereplicate=T, processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.denovo.uchime.chimeras res/$*.screen3.chimeras
	${MOTHUR} "#remove.seqs(fasta=${BUILD_DIR}/$*.afn, \
        accnos=${BUILD_DIR}/$*.denovo.uchime.accnos)" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.pick.afn seq/$*.screen3.afn
	@mv ${BUILD_DIR}/$*.denovo.uchime.pick.count_table meta/$*.screen3.count_table
	${MOTHUR_TEARDOWN}

# Taxonomic classification {{{3
res/%.tax res/%.tax-summary: seq/%.afn ref/rdp.nr.afn ref/rdp.nr.tax meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#classify.seqs(fasta=$(word 1,${LN_PREQS}), \
        count=$(word 4, ${LN_PREQS}), \
        reference=$(word 2,${LN_PREQS}), \
        taxonomy=$(word 3,${LN_PREQS}), \
        cutoff=80, processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.nr.wang.taxonomy res/$*.tax
	@mv ${BUILD_DIR}/$*.nr.wang.tax.summary res/$*.tax-summary
	${MOTHUR_TEARDOWN}

# Taxonomic screening {{{3
# Screen out Chloroplasts, Mitochondria, Archaea, Eukaryota
seq/%.screen4.afn res/%.screen4.tax meta/%.screen4.count_table: seq/%.afn res/%.tax meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#remove.lineage(fasta=$(word 1,${LN_PREQS}), \
        count=$(word 3,${LN_PREQS}), \
        taxonomy=$(word 2,${LN_PREQS}), \
        taxon=Chloroplast-Mitochondria-Archaea-Eukaryota-Unknown)" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.pick.afn seq/$*.screen4.afn
	@mv ${BUILD_DIR}/$*.pick.tax res/$*.screen4.tax
	@mv ${BUILD_DIR}/$*.pick.count_table meta/$*.screen4.count_table
	${MOTHUR_TEARDOWN}

# OTU tallying {{{3
# TODO: .names dependency instead of .count_table?
# This'll increase the size of the .otus file, but it will also mean it includes
# all of the names and not just the names associated with the counts.
# Maybe useful for representative sequence pulling?
res/%.otus: seq/%.afn meta/%.count_table res/%.tax
	${MOTHUR_SETUP}
	${MOTHUR} "#cluster.split(fasta=$(word 1,${LN_PREQS}), \
       count=$(word 2,${LN_PREQS}), taxonomy=$(word 3,${LN_PREQS}), \
       splitmethod=classify, taxlevel=${SPLIT_TAXLEVEL}, cutoff=${DIST_CUTOFF}, \
       processors=${MAX_PROCS}, method=${CLUST_METHOD})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$*.${CLUST_METHOD_ABBRV}.unique_list.list res/$*.otus
	${MOTHUR_TEARDOWN}

res/%.otus.shared: res/%.otus meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#make.shared(list=$(word 1,${LN_PREQS}), count=$(word 2,${LN_PREQS}), label=${OTU_CUTOFF})"
	@mv ${BUILD_DIR}/$*.shared $@
	${MOTHUR_TEARDOWN}

res/%.otus.tax: res/%.otus meta/%.count_table res/%.tax
	${MOTHUR_SETUP}
	${MOTHUR} "#classify.otu(list=$(word 1,${LN_PREQS}), count=$(word 2,${LN_PREQS}), \
        taxonomy=$(word 3,${LN_PREQS}), label=${OTU_CUTOFF})"
	@mv ${BUILD_DIR}/$*.${OTU_CUTOFF}.cons.taxonomy res/$*.otus.tax
	${MOTHUR_TEARDOWN}

seq/%.screen3.screen4.otus.reps.afn: scripts/get_otu_reps.py res/%.screen3.screen4.otus \
    meta/%.screen3.screen4.count_table seq/%.afn
	$(word 1,$^) $(word 2,$^) ${OTU_CUTOFF} $(word 3,$^) $(word 4,$^) > $@

# Rename fully processed files. {{{3
res/%.procd.otus.shared: res/%.contigs.screen1.uniq.align.screen2.trim.uniq.preclust.screen3.screen4.otus.shared
	cp $< $@

res/%.procd.otus.tax: res/%.contigs.screen1.uniq.align.screen2.trim.uniq.preclust.screen3.screen4.otus.tax
	cp $< $@

seq/%.procd.otus.reps.afn: seq/%.contigs.screen1.uniq.align.screen2.trim.uniq.preclust.screen3.screen4.otus.reps.afn
	cp $< $@

res/%.community.tsv: scripts/unstack_shared.py res/%.shared
	${P1} < ${P2} > $@

# [Partially redacted]


# =======================
#  Analysis {{{1
# =======================
# User defined recipes for analyzing the data.
# e.g. Calculating means, distributions, correlations, fitting models, etc.
# Basically anything that *could* go into the paper as a table.

# [Partially redacted]

# QC {{{3

# TODO

# LEFSE {{{2
# TODO: Fix this up for the new metadata format
meta/rrs.design: scripts/generate_lefse_design.py etc/rrs/library.tsv etc/extraction.tsv etc/sample.tsv etc/mouse.tsv
	$^ > $@

res/%.procd.otus.lefse: res/%.procd.otus.shared meta/%.design
	${MOTHUR_SETUP}
	${MOTHUR} "#lefse(shared=$(word 1,${LN_PREQS}), \
        design=$(word 2,${LN_PREQS}), \
        class=treatment, subclass=site)"
	@mv ${BUILD_DIR}/$*.procd.otus.${OTU_CUTOFF}.lefse_summary $@
	${MOTHUR_TEARDOWN}

# Jupyter Notebooks {{{2
# Add pre-requisites for particular jupyter notebooks so that
# `make build/<notebook>.ipynb` will first make those pre-reqs.
# e.g.
# build/notebook.ipynb: res/results.tsv

# ==================
#  Graphing {{{1
# ==================
# User defined recipes for plotting figures.  These should use
# the targets of analysis recipes above as their prerequisites.

# [Partially redacted]
	# ====================
	# Project Makefile
	# ====================
	# User Configuration {{{1
	# ====================

	# Major targets {{{2
	.PHONY: figs res
	figs:
	# [Partially redacted]

	res: res/rrs.procd.otus.shared
	res: res/rrs.procd.otus.tax
	res: seq/rrs.procd.otus.reps.afn
	# [Partially redacted]

	# What files are generated on `make all`?
	all: docs figs res

	# Compute environment {{{2
	# Name, and directory, of the python virtual environment:
	VENV = .venv

	# Directory for recipe-internal files and terminal documents:
	BUILD_DIR := build

	# Documentation settings {{{2
	.PHONY: docs

	docs: ${BUILD_DIR}/2016-02-02_pg-meeting.pdf

	BIB_FILE=doc/main.bib
	MAKEFILE_GRAPH_FLAGS=-d 'raw/seq/.' -d 'seq/split/.' -d 'meta/split/.' -d 'res/split.' -d 'raw/hplc/*'

	# Preqs for major documentation targets {{{3
	${BUILD_DIR}/README.%:

	# Cleanup settings {{{2
	# Use the following line to add files and directories to be deleted on `make clean`:
	CLEANUP += .docx .html .log .logfile .pbs.o .pbs.e ${BUILD_DIR}/*

	TESTCLEANUP = res/test* res/split/test* ${BUILD_DIR}/test* meta/test* seq/test* seq/split/test*
	.PHONY: testclean
	testclean:
	rm -rf ${TESTCLEANUP}

	# Initialization settings {{{2
	# What directories to generate on `make data-dirs`.
	# By default, already includes build/ fig/
	DATA_DIRS += raw/ raw/ref raw/seq \
	raw/hplc raw/hplc/proto \
	meta/ ref/ \
	res/ res/split \
	seq/ seq/split

	# MOTHUR settings {{{2

	# Link pre-requisites to the $BUILD dir
	define MOTHUR_SETUP
	@ln -fsrt ${BUILD_DIR}/ $^
	# START: > $@
	endef

	define MOTHUR_TEARDOWN
	# FINISH: > $@
	endef

	# Ordered list of pre-requisite link locations (in the $BUILD dir)
	LN_PREQS = $(addprefix ${BUILD_DIR}/,$(notdir $^))

	MOTHUR ?= mothur -q
	# If you wanna swap in mothur-mpi use:
	# MOTHUR ?= mpirun -np ${MAX_PROCS} mothur-mpi
	# Requires that you have built `mothur-mpi` as a MPI capable version

	# MOTHUR_STDOUT := >/dev/null
	MOTHUR_STDOUT :=

	MAX_PROCS ?= $(shell nproc)

	# Random seed for reproduciblity.
	SEED ?= 1

	MIN_READS_PER_SAMPLE = 6000 # When to throw out samples from community matrix

	NUM_PRECLUST_DIFFS = 2

	OTU_CUTOFF ?= 0.03
	DIST_CUTOFF ?= 0.15
	CLUST_METHOD ?= average
	SPLIT_TAXLEVEL ?= 4

	ifeq (${CLUST_METHOD}, furthest)
	CLUST_METHOD_ABBRV := fn
	else
	ifeq (${CLUST_METHOD}, average)
	CLUST_METHOD_ABBRV := an
	else
	ifeq (${CLUST_METHOD}, nearest)
	CLUST_METHOD_ABBRV := nn # TODO: Is this correct?
	else
	CLUST_METHOD_ABBRV := an
	endif
	endif
	endif

	# FINALLY: Include base makefile {{{2
	include base.mk

	# ==============
	# Data {{{1
	# ==============
	# User defined recipes for cleaning up and initially parsing data.
	# e.g. Slicing out columns, combining data sources, alignment, generating
	# phylogenies, etc.

	# 16S Library {{{2
	# Reference sets {{{3

	extract_from_tarball = tar -Oxzf ${1} ${2} > $@

	raw/ref/Silva.nr_v119.tgz:
	wget --directory-prefix=${@D} http://www.mothur.org/w/images/2/27/Silva.nr_v119.tgz

	ref/silva.nr.afn: raw/ref/Silva.nr_v119.tgz
	$(call extract_from_tarball,$^,silva.nr_v119.align)

	ref/silva.nr.tax: raw/ref/Silva.nr_v119.tgz
	$(call extract_from_tarball,$^,silva.nr_v119.tax)

	# TODO: Is this the region I really want?
	ref/silva.nr.pcr-v4.afn: ref/silva.nr.afn
	${MOTHUR_SETUP}
	${MOTHUR} "#pcr.seqs(fasta=${LN_PREQS}, start=11894, end=25319, \
	keepdots=F, processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/silva.nr.pcr.afn $@
	${MOTHUR_TEARDOWN}

	raw/ref/Silva.gold.bacteria.zip:
	wget --directory-prefix=${@D} http://www.mothur.org/w/images/f/f1/Silva.gold.bacteria.zip

	ref/silva.gold.afn: raw/ref/Silva.gold.bacteria.zip
	unzip $^
	mv silva.gold.align $@
	@touch $@

	raw/ref/Silva.seed_v119.tgz:
	wget --directory-prefix=${@D} http://www.mothur.org/w/images/1/15/Silva.seed_v123.tgz

	ref/silva.seed.afn: raw/ref/Silva.seed_v119.tgz
	$(call extract_from_tarball,$^,silva.seed_v119.align)

	ref/silva.seed.tax: raw/ref/Silva.seed_v119.tgz
	$(call extract_from_tarball,$^,silva.seed_v119.tax)

	raw/ref/Trainset14_032015.pds.tgz:
	wget --directory-prefix=${@D} http://www.mothur.org/w/images/8/88/Trainset14_032015.pds.tgz

	ref/rdp.nr.afn: raw/ref/Trainset14_032015.pds.tgz
	$(call extract_from_tarball,$^,trainset14_032015.pds/trainset14_032015.pds.fasta)

	ref/rdp.nr.tax: raw/ref/Trainset14_032015.pds.tgz
	$(call extract_from_tarball,$^,trainset14_032015.pds/trainset14_032015.pds.tax)

	# Metadata {{{3
	# http://www.mothur.org/wiki/MiSeq_SOP#Getting_started
	meta/rrs.files meta/%.rrs.files: scripts/generate_files_file.py \
	etc/rrs/library.tsv etc/rrs/analysis_group.tsv
	$^ $* > $@

	# Dynamic `include`ing and generation of *.contigs.mk {{{3
	# For every unique library.series, generate a makefile in build/ which
	ALL_LIBRARY_ANALYSIS_GROUPS := \
	$(shell sed '1,1d' etc/rrs/analysis_group.tsv \| cut -f2 \| sort \| uniq)
	ALL_CONTIGS_MAKEFILES := \
	$(patsubst %,${BUILD_DIR}/%.rrs.contigs.mk,${ALL_LIBRARY_ANALYSIS_GROUPS}) \
	${BUILD_DIR}/rrs.contigs.mk
	ALL_LIBRARY_SERIES += all
	ifeq (${INITIALIZED},True)
	$(foreach makefile,${ALL_CONTIGS_MAKEFILES},$(eval include ${makefile}))
	endif

	${BUILD_DIR}/%.contigs.mk: scripts/generate_contigs_makefile.py meta/%.files
	$^ $* > $@

	raw/seq/%.fastq.gz: raw/seq/fake_root.tgz
	echo "This does not make $@. If you actually run this you are doing something wrong."

	# Make contigs from paired-end reads
	# Preqs defined in each build/%.contigs.mk
	seq/split/%.rrs.contigs.fn seq/split/%.rrs.contigs.qual res/split/%.rrs.contigs.report:
	gzip -dc $(word 1,$^) > ${BUILD_DIR}/$*.r1.fastq
	gzip -dc $(word 2,$^) > ${BUILD_DIR}/$*.r2.fastq
	${MOTHUR} "#make.contigs(ffastq=${BUILD_DIR}/$*.r1.fastq, \
	rfastq=${BUILD_DIR}/$*.r2.fastq, \
	trimoverlap=T)" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.r1.trim.contigs.fasta seq/split/$.rrs.contigs.fn
	@mv ${BUILD_DIR}/$.r1.trim.contigs.qual seq/split/$.rrs.contigs.qual
	@mv ${BUILD_DIR}/$.r1.contigs.report res/split/$.rrs.contigs.report
	${MOTHUR_TEARDOWN}

	# Concatenate individual contig files
	# Preqs defined in each build/%.contigs.mk
	seq/%.contigs.fn:
	cat $^ > $@

	# Define groups based on names of individual contig files
	# Preqs defined in each build/%.contigs.mk
	meta/%.contigs.groups:
	$^ > $@

	# Align, classify, screen and cluster seqs {{{3
	# Screen 1: remove sequences which are the wrong length or have too many
	# ambiguous nucleotides
	# TODO: Screen params?
	# TODO: bug report the problem with dashes in file names for some commands.
	seq/%.screen1.fn meta/%.screen1.groups: seq/%.fn meta/%.groups
	${MOTHUR_SETUP}
	${MOTHUR} "#screen.seqs(fasta=$(word 1,${LN_PREQS}), group=$(word 2,${LN_PREQS}), \
	maxambig=0, maxlength=295, processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.good.fn seq/$.screen1.fn
	@mv ${BUILD_DIR}/$.good.groups meta/$.screen1.groups
	${MOTHUR_TEARDOWN}

	# Uniq-ify seqs {{{3
	seq/%.uniq.fn meta/%.uniq.names: seq/%.fn
	${MOTHUR_SETUP}
	${MOTHUR} "#unique.seqs(fasta=${LN_PREQS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.unique.fn seq/$.uniq.fn
	@mv ${BUILD_DIR}/$.names meta/$.uniq.names
	${MOTHUR_TEARDOWN}

	seq/%.uniq.afn meta/%.uniq.names: seq/%.afn
	${MOTHUR_SETUP}
	${MOTHUR} "#unique.seqs(fasta=${LN_PREQS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.unique.afn seq/$.uniq.afn
	@mv ${BUILD_DIR}/$.names meta/$.uniq.names
	${MOTHUR_TEARDOWN}

	meta/%.contigs.screen1.uniq.count_table: meta/%.contigs.screen1.uniq.names meta/%.contigs.screen1.groups
	${MOTHUR_SETUP}
	${MOTHUR} "#count.seqs(name=$(word 1,${LN_PREQS}), \
	group=$(word 2,${LN_PREQS}), processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.contigs.screen1.uniq.count_table meta/$.contigs.screen1.uniq.count_table
	${MOTHUR_TEARDOWN}

	# Alignment {{{3
	seq/%.align.afn res/%.align.report: seq/%.fn ref/silva.nr.pcr-v4.afn
	${MOTHUR_SETUP}
	${MOTHUR} "#align.seqs(fasta=$(word 1,${LN_PREQS}), \
	reference=$(word 2,${LN_PREQS}), processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.align seq/$.align.afn
	@mv ${BUILD_DIR}/$.align.report res/$.align.report
	${MOTHUR_TEARDOWN}

	# Sequence cleanup and screening {{{3
	seq/%.align.screen2.afn meta/%.align.screen2.count_table: seq/%.align.afn meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#screen.seqs(fasta=$(word 1,${LN_PREQS}), start=3100, \
	count=$(word 2,${LN_PREQS}), end=10600, maxhomop=8, \
	processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.align.good.afn seq/$.align.screen2.afn
	@mv ${BUILD_DIR}/$.good.count_table meta/$.align.screen2.count_table
	${MOTHUR_TEARDOWN}

	seq/%.trim.afn: seq/%.afn
	${MOTHUR_SETUP}
	${MOTHUR} "#filter.seqs(fasta=$(word 1,${LN_PREQS}), vertical=T, trump=., \
	processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.filter.fasta seq/$.trim.afn
	${MOTHUR_TEARDOWN}

	seq/%.trim.uniq.afn meta/%.trim.uniq.count_table: seq/%.trim.afn meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#unique.seqs(fasta=$(word 1,${LN_PREQS}), \
	count=$(word 2,${LN_PREQS}))" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.trim.unique.afn seq/$.trim.uniq.afn
	@mv ${BUILD_DIR}/$.trim.count_table meta/$.trim.uniq.count_table
	${MOTHUR_TEARDOWN}

	# TODO: Why doesn't the .name and .groups file combination work?
	# I get told that there are names in the .names file which are not in the
	# fasta, but those names are sub-names of uniq seqs, so that's what I expect...?
	seq/%.preclust.afn meta/%.preclust.count_table: \
	seq/%.afn \
	meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#pre.cluster(fasta=$(word 1,${LN_PREQS}), \
	count=$(word 2,${LN_PREQS}), diffs=${NUM_PRECLUST_DIFFS}, \
	processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.precluster.afn seq/$.preclust.afn
	@mv ${BUILD_DIR}/$.precluster.count_table meta/$.preclust.count_table
	${MOTHUR_TEARDOWN}

	# Chimera screening {{{3
	seq/%.screen3.afn res/%.screen3.chimeras meta/%.screen3.count_table: seq/%.afn meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#chimera.uchime(fasta=$(word 1,${LN_PREQS}), \
	count=$(word 2,${LN_PREQS}), \
	dereplicate=T, processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.denovo.uchime.chimeras res/$.screen3.chimeras
	${MOTHUR} "#remove.seqs(fasta=${BUILD_DIR}/$*.afn, \
	accnos=${BUILD_DIR}/$*.denovo.uchime.accnos)" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.pick.afn seq/$.screen3.afn
	@mv ${BUILD_DIR}/$.denovo.uchime.pick.count_table meta/$.screen3.count_table
	${MOTHUR_TEARDOWN}

	# Taxonomic classification {{{3
	res/%.tax res/%.tax-summary: seq/%.afn ref/rdp.nr.afn ref/rdp.nr.tax meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#classify.seqs(fasta=$(word 1,${LN_PREQS}), \
	count=$(word 4, ${LN_PREQS}), \
	reference=$(word 2,${LN_PREQS}), \
	taxonomy=$(word 3,${LN_PREQS}), \
	cutoff=80, processors=${MAX_PROCS})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.nr.wang.taxonomy res/$.tax
	@mv ${BUILD_DIR}/$.nr.wang.tax.summary res/$.tax-summary
	${MOTHUR_TEARDOWN}

	# Taxonomic screening {{{3
	# Screen out Chloroplasts, Mitochondria, Archaea, Eukaryota
	seq/%.screen4.afn res/%.screen4.tax meta/%.screen4.count_table: seq/%.afn res/%.tax meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#remove.lineage(fasta=$(word 1,${LN_PREQS}), \
	count=$(word 3,${LN_PREQS}), \
	taxonomy=$(word 2,${LN_PREQS}), \
	taxon=Chloroplast-Mitochondria-Archaea-Eukaryota-Unknown)" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.pick.afn seq/$.screen4.afn
	@mv ${BUILD_DIR}/$.pick.tax res/$.screen4.tax
	@mv ${BUILD_DIR}/$.pick.count_table meta/$.screen4.count_table
	${MOTHUR_TEARDOWN}

	# OTU tallying {{{3
	# TODO: .names dependency instead of .count_table?
	# This'll increase the size of the .otus file, but it will also mean it includes
	# all of the names and not just the names associated with the counts.
	# Maybe useful for representative sequence pulling?
	res/%.otus: seq/%.afn meta/%.count_table res/%.tax
	${MOTHUR_SETUP}
	${MOTHUR} "#cluster.split(fasta=$(word 1,${LN_PREQS}), \
	count=$(word 2,${LN_PREQS}), taxonomy=$(word 3,${LN_PREQS}), \
	splitmethod=classify, taxlevel=${SPLIT_TAXLEVEL}, cutoff=${DIST_CUTOFF}, \
	processors=${MAX_PROCS}, method=${CLUST_METHOD})" ${MOTHUR_STDOUT}
	@mv ${BUILD_DIR}/$.${CLUST_METHOD_ABBRV}.unique_list.list res/$.otus
	${MOTHUR_TEARDOWN}

	res/%.otus.shared: res/%.otus meta/%.count_table
	${MOTHUR_SETUP}
	${MOTHUR} "#make.shared(list=$(word 1,${LN_PREQS}), count=$(word 2,${LN_PREQS}), label=${OTU_CUTOFF})"
	@mv ${BUILD_DIR}/$*.shared $@
	${MOTHUR_TEARDOWN}

	res/%.otus.tax: res/%.otus meta/%.count_table res/%.tax
	${MOTHUR_SETUP}
	${MOTHUR} "#classify.otu(list=$(word 1,${LN_PREQS}), count=$(word 2,${LN_PREQS}), \
	taxonomy=$(word 3,${LN_PREQS}), label=${OTU_CUTOFF})"
	@mv ${BUILD_DIR}/$.${OTU_CUTOFF}.cons.taxonomy res/$.otus.tax
	${MOTHUR_TEARDOWN}

	seq/%.screen3.screen4.otus.reps.afn: scripts/get_otu_reps.py res/%.screen3.screen4.otus \
	meta/%.screen3.screen4.count_table seq/%.afn
	$(word 1,$^) $(word 2,$^) ${OTU_CUTOFF} $(word 3,$^) $(word 4,$^) > $@

	# Rename fully processed files. {{{3
	res/%.procd.otus.shared: res/%.contigs.screen1.uniq.align.screen2.trim.uniq.preclust.screen3.screen4.otus.shared
	cp $< $@

	res/%.procd.otus.tax: res/%.contigs.screen1.uniq.align.screen2.trim.uniq.preclust.screen3.screen4.otus.tax
	cp $< $@

	seq/%.procd.otus.reps.afn: seq/%.contigs.screen1.uniq.align.screen2.trim.uniq.preclust.screen3.screen4.otus.reps.afn
	cp $< $@

	res/%.community.tsv: scripts/unstack_shared.py res/%.shared
	${P1} < ${P2} > $@

	# [Partially redacted]


	# =======================
	# Analysis {{{1
	# =======================
	# User defined recipes for analyzing the data.
	# e.g. Calculating means, distributions, correlations, fitting models, etc.
	# Basically anything that could go into the paper as a table.

	# [Partially redacted]

	# QC {{{3

	# TODO

	# LEFSE {{{2
	# TODO: Fix this up for the new metadata format
	meta/rrs.design: scripts/generate_lefse_design.py etc/rrs/library.tsv etc/extraction.tsv etc/sample.tsv etc/mouse.tsv
	$^ > $@

	res/%.procd.otus.lefse: res/%.procd.otus.shared meta/%.design
	${MOTHUR_SETUP}
	${MOTHUR} "#lefse(shared=$(word 1,${LN_PREQS}), \
	design=$(word 2,${LN_PREQS}), \
	class=treatment, subclass=site)"
	@mv ${BUILD_DIR}/$*.procd.otus.${OTU_CUTOFF}.lefse_summary $@
	${MOTHUR_TEARDOWN}

	# Jupyter Notebooks {{{2
	# Add pre-requisites for particular jupyter notebooks so that
	# `make build/<notebook>.ipynb` will first make those pre-reqs.
	# e.g.
	# build/notebook.ipynb: res/results.tsv

	# ==================
	# Graphing {{{1
	# ==================
	# User defined recipes for plotting figures. These should use
	# the targets of analysis recipes above as their prerequisites.

	# [Partially redacted]