gregcaporaso/empress-replay.bash

## empress-replay.bash
#!/usr/bin/env bash
###############################################################################
# Auto-generated by provenance_lib v.0.2.0 at 09:49:20 AM on 24 Jan, 2023

# This document is a representation of the scholarly work of the creator of the
# QIIME 2 Results provided as input to provenance_lib, and may be protected by
# intellectual property law. Please respect all copyright restrictions and
# licenses governing the use, modification, and redistribution of this work.

# For User Support, post to the Community Plugin Support channel of the QIIME 2
# Forum: https://forum.qiime2.org
# Documentation/issues: https://github.com/qiime2/provenance_lib

# UUIDs of all target QIIME 2 Results are shown at the end of the file

# Instructions for use:
# 1. Open this script in a text editor or IDE. Support for BASH
#    syntax highlighting can be helpful.
# 2. Search or scan visually for '<' or '>' characters to find places where
#    user input (e.g. a filepath or column name) is required. These must be
#    replaced with your own values. E.g. <column name> -> 'patient_id'.
#    Failure to remove '<' or '>' may result in `No such File ...` errors
# 3. Search for 'FIXME' comments in the script, and respond as directed.
# 4. Remove all 'FIXME' comments from the script completely. Failure to do so
#    may result in 'Missing Option' errors
# 5. Adjust the arguments to the commands below to suit your data and metadata.
#    If your data is not identical to that in the replayed analysis,
#    changes may be required. (e.g. sample ids or rarefaction depth)
# 6. Optional: replace any filenames in this script that begin with 'XX' with
#    unique file names to ensure they are preserved. QIIME 2 saves all outputs
#    from all actions in this script to disk regardless of whether those
#    outputs were in the original collection of replayed results. The filenames
#    of "un-replayed" artifacts are prefixed with 'XX' so they may be easily
#    located. These names are not guaranteed to be unique, so 'XX_table.qza'
#    may be overwritten by another 'XX_table.qza' later in the script.
# 7. Activate your replay conda environment, and confirm you have installed all
#    plugins used by the script.
# 8. Run this script with `bash <path to this script>`, or copy-paste commands
#    into the terminal for a more interactive analysis.
# 9. Optional: to delete all results not required to produce the figures and
#    data used to generate this script, navigate to the directory in which you
#    ran the script and `rm XX*.qz*`
###############################################################################

# This tells bash to -e exit immediately if a command fails
# and -x show all commands in stdout so you can track progress
set -e -x

qiime tools import \
  --type 'FeatureData[Taxonomy]' \
  --input-path <your data here> \
  --output-path feature-data-taxonomy-0.qza

qiime tools import \
  --type 'FeatureData[Sequence]' \
  --input-path <your data here> \
  --output-path feature-data-sequence-0.qza

qiime tools import \
  --type 'SampleData[SequencesWithQuality]' \
  --input-path <your data here> \
  --output-path sample-data-sequences-with-quality-0.qza

qiime feature-classifier fit-classifier-naive-bayes \
  --i-reference-reads feature-data-sequence-0.qza \
  --i-reference-taxonomy feature-data-taxonomy-0.qza \
  --p-classify--alpha 0.001 \
  --p-classify--chunk-size 20000 \
  --p-classify--class-prior null \
  --p-no-classify--fit-prior \
  --p-no-feat-ext--alternate-sign \
  --p-feat-ext--analyzer char_wb \
  --p-no-feat-ext--binary \
  --p-feat-ext--decode-error strict \
  --p-feat-ext--encoding utf-8 \
  --p-feat-ext--input content \
  --p-feat-ext--lowercase \
  --p-feat-ext--n-features 8192 \
  --p-feat-ext--ngram-range '[7, 7]' \
  --p-feat-ext--norm l2 \
  --p-feat-ext--preprocessor null \
  --p-feat-ext--stop-words null \
  --p-feat-ext--strip-accents null \
  --p-feat-ext--token-pattern '(?u)\b\w\w+\b' \
  --p-feat-ext--tokenizer null \
  --p-no-verbose \
  --o-classifier classifier-0.qza

qiime cutadapt trim-single \
  --i-demultiplexed-sequences sample-data-sequences-with-quality-0.qza \
  --p-cores 15 \
  --p-adapter CCGTCAATTCMTTTRAGT...CTGCTGCCTCCCGTAGG \
  --p-error-rate 0.1 \
  --p-indels \
  --p-times 1 \
  --p-overlap 3 \
  --p-no-match-read-wildcards \
  --p-match-adapter-wildcards \
  --p-minimum-length 1 \
  --p-discard-untrimmed \
  --o-trimmed-sequences trimmed-sequences-0.qza

qiime dada2 denoise-pyro \
  --i-demultiplexed-seqs trimmed-sequences-0.qza \
  --p-trunc-len 150 \
  --p-trim-left 0 \
  --p-max-ee 2.0 \
  --p-trunc-q 2 \
  --p-max-len 0 \
  --p-pooling-method independent \
  --p-chimera-method consensus \
  --p-min-fold-parent-over-abundance 1.0 \
  --p-no-allow-one-off \
  --p-n-threads 0 \
  --p-n-reads-learn 250000 \
  --p-hashed-feature-ids \
  --o-representative-sequences representative-sequences-0.qza \
  --o-table table-0.qza \
  --o-denoising-stats XX_denoising_stats

qiime feature-table filter-features \
  --i-table table-0.qza \
  --p-min-frequency 0 \
  --p-min-samples 2 \
  --p-no-exclude-ids \
  --p-filter-empty-samples \
  --o-filtered-table filtered-table-0.qza

qiime feature-table filter-seqs \
  --i-data representative-sequences-0.qza \
  --i-table filtered-table-0.qza \
  --p-no-exclude-ids \
  --o-filtered-data filtered-data-0.qza

qiime phylogeny align-to-tree-mafft-fasttree \
  --i-sequences filtered-data-0.qza \
  --p-n-threads 16 \
  --p-mask-max-gap-frequency 1.0 \
  --p-mask-min-conservation 0.4 \
  --p-no-parttree \
  --o-rooted-tree rooted-tree-0.qza \
  --o-alignment XX_alignment \
  --o-masked-alignment XX_masked_alignment \
  --o-tree XX_tree

qiime feature-classifier classify-sklearn \
  --i-reads filtered-data-0.qza \
  --i-classifier classifier-0.qza \
  --p-reads-per-batch auto \
  --p-n-jobs 8 \
  --p-pre-dispatch '2*n_jobs' \
  --p-confidence 0.7 \
  --p-read-orientation auto \
  --o-classification classification-0.qza

# Replay attempts to represent metadata inputs accurately, but metadata .tsv
# files are merged automatically by some interfaces, rendering distinctions
# between file inputs invisible in provenance. We output the recorded
# metadata to disk to enable visual inspection.

# The following command may have received additional metadata .tsv files. To
# confirm you have covered your metadata needs adequately, review the
# original metadata, saved at
# './recorded_metadata/diversity_core_metrics_phylogenetic_0/'

qiime diversity core-metrics-phylogenetic \
  --i-table filtered-table-0.qza \
  --i-phylogeny rooted-tree-0.qza \
  --p-sampling-depth 4000 \
  --m-metadata-file <your metadata filepath> \
  --p-no-with-replacement \
  --p-n-jobs-or-threads 16 \
  --o-rarefied-table rarefied-table-0.qza \
  --o-unweighted-unifrac-pcoa-results unweighted-unifrac-pcoa-results-0.qza \
  --o-faith-pd-vector XX_faith_pd_vector \
  --o-observed-features-vector XX_observed_features_vector \
  --o-shannon-vector XX_shannon_vector \
  --o-evenness-vector XX_evenness_vector \
  --o-unweighted-unifrac-distance-matrix XX_unweighted_unifrac_distance_matrix \
  --o-weighted-unifrac-distance-matrix XX_weighted_unifrac_distance_matrix \
  --o-jaccard-distance-matrix XX_jaccard_distance_matrix \
  --o-bray-curtis-distance-matrix XX_bray_curtis_distance_matrix \
  --o-weighted-unifrac-pcoa-results XX_weighted_unifrac_pcoa_results \
  --o-jaccard-pcoa-results XX_jaccard_pcoa_results \
  --o-bray-curtis-pcoa-results XX_bray_curtis_pcoa_results \
  --o-unweighted-unifrac-emperor XX_unweighted_unifrac_emperor \
  --o-weighted-unifrac-emperor XX_weighted_unifrac_emperor \
  --o-jaccard-emperor XX_jaccard_emperor \
  --o-bray-curtis-emperor XX_bray_curtis_emperor

qiime feature-table relative-frequency \
  --i-table rarefied-table-0.qza \
  --o-relative-frequency-table relative-frequency-table-0.qza

qiime diversity pcoa-biplot \
  --i-pcoa unweighted-unifrac-pcoa-results-0.qza \
  --i-features relative-frequency-table-0.qza \
  --o-biplot biplot-0.qza

# The following command may have received additional metadata .tsv files. To
# confirm you have covered your metadata needs adequately, review the
# original metadata, saved at
# './recorded_metadata/empress_community_plot_0/'

# The following command may have received additional metadata .tsv files. To
# confirm you have covered your metadata needs adequately, review the
# original metadata, saved at
# './recorded_metadata/empress_community_plot_0/'

qiime empress community-plot \
  --i-tree rooted-tree-0.qza \
  --i-feature-table relative-frequency-table-0.qza \
  --i-pcoa biplot-0.qza \
  --m-sample-metadata-file <your metadata filepath> \
  --m-feature-metadata-file classification-0.qza \
  --p-no-ignore-missing-samples \
  --p-no-filter-extra-samples \
  --p-no-filter-missing-features \
  --p-number-of-features 5 \
  --p-shear-to-table \
  --o-visualization visualization-0.qzv


###############################################################################
# The following QIIME 2 Results were parsed to produce this script:
# a1a46509-66fc-4719-b72b-eaa23443bed4
###############################################################################
	#!/usr/bin/env bash
	###############################################################################
	# Auto-generated by provenance_lib v.0.2.0 at 09:49:20 AM on 24 Jan, 2023

	# This document is a representation of the scholarly work of the creator of the
	# QIIME 2 Results provided as input to provenance_lib, and may be protected by
	# intellectual property law. Please respect all copyright restrictions and
	# licenses governing the use, modification, and redistribution of this work.

	# For User Support, post to the Community Plugin Support channel of the QIIME 2
	# Forum: https://forum.qiime2.org
	# Documentation/issues: https://github.com/qiime2/provenance_lib

	# UUIDs of all target QIIME 2 Results are shown at the end of the file

	# Instructions for use:
	# 1. Open this script in a text editor or IDE. Support for BASH
	# syntax highlighting can be helpful.
	# 2. Search or scan visually for '<' or '>' characters to find places where
	# user input (e.g. a filepath or column name) is required. These must be
	# replaced with your own values. E.g. <column name> -> 'patient_id'.
	# Failure to remove '<' or '>' may result in `No such File ...` errors
	# 3. Search for 'FIXME' comments in the script, and respond as directed.
	# 4. Remove all 'FIXME' comments from the script completely. Failure to do so
	# may result in 'Missing Option' errors
	# 5. Adjust the arguments to the commands below to suit your data and metadata.
	# If your data is not identical to that in the replayed analysis,
	# changes may be required. (e.g. sample ids or rarefaction depth)
	# 6. Optional: replace any filenames in this script that begin with 'XX' with
	# unique file names to ensure they are preserved. QIIME 2 saves all outputs
	# from all actions in this script to disk regardless of whether those
	# outputs were in the original collection of replayed results. The filenames
	# of "un-replayed" artifacts are prefixed with 'XX' so they may be easily
	# located. These names are not guaranteed to be unique, so 'XX_table.qza'
	# may be overwritten by another 'XX_table.qza' later in the script.
	# 7. Activate your replay conda environment, and confirm you have installed all
	# plugins used by the script.
	# 8. Run this script with `bash <path to this script>`, or copy-paste commands
	# into the terminal for a more interactive analysis.
	# 9. Optional: to delete all results not required to produce the figures and
	# data used to generate this script, navigate to the directory in which you
	# ran the script and `rm XX.qz`
	###############################################################################

	# This tells bash to -e exit immediately if a command fails
	# and -x show all commands in stdout so you can track progress
	set -e -x

	qiime tools import \
	--type 'FeatureData[Taxonomy]' \
	--input-path <your data here> \
	--output-path feature-data-taxonomy-0.qza

	qiime tools import \
	--type 'FeatureData[Sequence]' \
	--input-path <your data here> \
	--output-path feature-data-sequence-0.qza

	qiime tools import \
	--type 'SampleData[SequencesWithQuality]' \
	--input-path <your data here> \
	--output-path sample-data-sequences-with-quality-0.qza

	qiime feature-classifier fit-classifier-naive-bayes \
	--i-reference-reads feature-data-sequence-0.qza \
	--i-reference-taxonomy feature-data-taxonomy-0.qza \
	--p-classify--alpha 0.001 \
	--p-classify--chunk-size 20000 \
	--p-classify--class-prior null \
	--p-no-classify--fit-prior \
	--p-no-feat-ext--alternate-sign \
	--p-feat-ext--analyzer char_wb \
	--p-no-feat-ext--binary \
	--p-feat-ext--decode-error strict \
	--p-feat-ext--encoding utf-8 \
	--p-feat-ext--input content \
	--p-feat-ext--lowercase \
	--p-feat-ext--n-features 8192 \
	--p-feat-ext--ngram-range '[7, 7]' \
	--p-feat-ext--norm l2 \
	--p-feat-ext--preprocessor null \
	--p-feat-ext--stop-words null \
	--p-feat-ext--strip-accents null \
	--p-feat-ext--token-pattern '(?u)\b\w\w+\b' \
	--p-feat-ext--tokenizer null \
	--p-no-verbose \
	--o-classifier classifier-0.qza

	qiime cutadapt trim-single \
	--i-demultiplexed-sequences sample-data-sequences-with-quality-0.qza \
	--p-cores 15 \
	--p-adapter CCGTCAATTCMTTTRAGT...CTGCTGCCTCCCGTAGG \
	--p-error-rate 0.1 \
	--p-indels \
	--p-times 1 \
	--p-overlap 3 \
	--p-no-match-read-wildcards \
	--p-match-adapter-wildcards \
	--p-minimum-length 1 \
	--p-discard-untrimmed \
	--o-trimmed-sequences trimmed-sequences-0.qza

	qiime dada2 denoise-pyro \
	--i-demultiplexed-seqs trimmed-sequences-0.qza \
	--p-trunc-len 150 \
	--p-trim-left 0 \
	--p-max-ee 2.0 \
	--p-trunc-q 2 \
	--p-max-len 0 \
	--p-pooling-method independent \
	--p-chimera-method consensus \
	--p-min-fold-parent-over-abundance 1.0 \
	--p-no-allow-one-off \
	--p-n-threads 0 \
	--p-n-reads-learn 250000 \
	--p-hashed-feature-ids \
	--o-representative-sequences representative-sequences-0.qza \
	--o-table table-0.qza \
	--o-denoising-stats XX_denoising_stats

	qiime feature-table filter-features \
	--i-table table-0.qza \
	--p-min-frequency 0 \
	--p-min-samples 2 \
	--p-no-exclude-ids \
	--p-filter-empty-samples \
	--o-filtered-table filtered-table-0.qza

	qiime feature-table filter-seqs \
	--i-data representative-sequences-0.qza \
	--i-table filtered-table-0.qza \
	--p-no-exclude-ids \
	--o-filtered-data filtered-data-0.qza

	qiime phylogeny align-to-tree-mafft-fasttree \
	--i-sequences filtered-data-0.qza \
	--p-n-threads 16 \
	--p-mask-max-gap-frequency 1.0 \
	--p-mask-min-conservation 0.4 \
	--p-no-parttree \
	--o-rooted-tree rooted-tree-0.qza \
	--o-alignment XX_alignment \
	--o-masked-alignment XX_masked_alignment \
	--o-tree XX_tree

	qiime feature-classifier classify-sklearn \
	--i-reads filtered-data-0.qza \
	--i-classifier classifier-0.qza \
	--p-reads-per-batch auto \
	--p-n-jobs 8 \
	--p-pre-dispatch '2*n_jobs' \
	--p-confidence 0.7 \
	--p-read-orientation auto \
	--o-classification classification-0.qza

	# Replay attempts to represent metadata inputs accurately, but metadata .tsv
	# files are merged automatically by some interfaces, rendering distinctions
	# between file inputs invisible in provenance. We output the recorded
	# metadata to disk to enable visual inspection.

	# The following command may have received additional metadata .tsv files. To
	# confirm you have covered your metadata needs adequately, review the
	# original metadata, saved at
	# './recorded_metadata/diversity_core_metrics_phylogenetic_0/'

	qiime diversity core-metrics-phylogenetic \
	--i-table filtered-table-0.qza \
	--i-phylogeny rooted-tree-0.qza \
	--p-sampling-depth 4000 \
	--m-metadata-file <your metadata filepath> \
	--p-no-with-replacement \
	--p-n-jobs-or-threads 16 \
	--o-rarefied-table rarefied-table-0.qza \
	--o-unweighted-unifrac-pcoa-results unweighted-unifrac-pcoa-results-0.qza \
	--o-faith-pd-vector XX_faith_pd_vector \
	--o-observed-features-vector XX_observed_features_vector \
	--o-shannon-vector XX_shannon_vector \
	--o-evenness-vector XX_evenness_vector \
	--o-unweighted-unifrac-distance-matrix XX_unweighted_unifrac_distance_matrix \
	--o-weighted-unifrac-distance-matrix XX_weighted_unifrac_distance_matrix \
	--o-jaccard-distance-matrix XX_jaccard_distance_matrix \
	--o-bray-curtis-distance-matrix XX_bray_curtis_distance_matrix \
	--o-weighted-unifrac-pcoa-results XX_weighted_unifrac_pcoa_results \
	--o-jaccard-pcoa-results XX_jaccard_pcoa_results \
	--o-bray-curtis-pcoa-results XX_bray_curtis_pcoa_results \
	--o-unweighted-unifrac-emperor XX_unweighted_unifrac_emperor \
	--o-weighted-unifrac-emperor XX_weighted_unifrac_emperor \
	--o-jaccard-emperor XX_jaccard_emperor \
	--o-bray-curtis-emperor XX_bray_curtis_emperor

	qiime feature-table relative-frequency \
	--i-table rarefied-table-0.qza \
	--o-relative-frequency-table relative-frequency-table-0.qza

	qiime diversity pcoa-biplot \
	--i-pcoa unweighted-unifrac-pcoa-results-0.qza \
	--i-features relative-frequency-table-0.qza \
	--o-biplot biplot-0.qza

	# The following command may have received additional metadata .tsv files. To
	# confirm you have covered your metadata needs adequately, review the
	# original metadata, saved at
	# './recorded_metadata/empress_community_plot_0/'

	# The following command may have received additional metadata .tsv files. To
	# confirm you have covered your metadata needs adequately, review the
	# original metadata, saved at
	# './recorded_metadata/empress_community_plot_0/'

	qiime empress community-plot \
	--i-tree rooted-tree-0.qza \
	--i-feature-table relative-frequency-table-0.qza \
	--i-pcoa biplot-0.qza \
	--m-sample-metadata-file <your metadata filepath> \
	--m-feature-metadata-file classification-0.qza \
	--p-no-ignore-missing-samples \
	--p-no-filter-extra-samples \
	--p-no-filter-missing-features \
	--p-number-of-features 5 \
	--p-shear-to-table \
	--o-visualization visualization-0.qzv


	###############################################################################
	# The following QIIME 2 Results were parsed to produce this script:
	# a1a46509-66fc-4719-b72b-eaa23443bed4
	###############################################################################