gregcaporaso/empress-replay.py

## empress-replay.py
#!/usr/bin/env python
# -----------------------------------------------------------------------------
# Auto-generated by provenance_lib v.0.2.0 at 09:49:51 AM on 24 Jan, 2023

# This document is a representation of the scholarly work of the creator of the
# QIIME 2 Results provided as input to provenance_lib, and may be protected by
# intellectual property law. Please respect all copyright restrictions and
# licenses governing the use, modification, and redistribution of this work.

# For User Support, post to the Community Plugin Support channel of the QIIME 2
# Forum: https://forum.qiime2.org
# Documentation/issues: https://github.com/qiime2/provenance_lib

# UUIDs of all target QIIME 2 Results are shown at the end of the file

# Instructions for use:
# 1. Open this script in a text editor or IDE. Support for Python
#    syntax highlighting is helpful.
# 2. Search or scan visually for '<' or '>' characters to find places where
#    user input (e.g. a filepath or column name) is required. If syntax
#    highlighting is enabled, '<' and '>' will appear as syntax errors.
# 3. Search for 'FIXME' comments in the script, and respond as directed.
# 4. Remove all 'FIXME' comments from the script completely. Failure to do so
#    may result in 'Missing Option' errors
# 5. Adjust the arguments to the commands below to suit your data and metadata.
#    If your data is not identical to that in the replayed analysis,
#    changes may be required. (e.g. sample ids or rarefaction depth)
# 6. Optional: search for 'SAVE' comments in the script, commenting out the
#    `some_result.save` lines for any Results you do not want saved to disk.
# 7. Activate your replay conda environment, and confirm you have installed all
#    plugins used by the script.
# 8. Run this script with `python <path to this script>`, or paste commands
#    into a python interpreter or jupyter notebook for an interactive analysis
# -----------------------------------------------------------------------------

from qiime2 import Artifact
from qiime2 import Metadata
import qiime2.plugins.cutadapt.actions as cutadapt_actions
import qiime2.plugins.dada2.actions as dada2_actions
import qiime2.plugins.diversity.actions as diversity_actions
import qiime2.plugins.empress.actions as empress_actions
import qiime2.plugins.feature_classifier.actions as feature_classifier_actions
import qiime2.plugins.feature_table.actions as feature_table_actions
import qiime2.plugins.phylogeny.actions as phylogeny_actions

feature_data_taxonomy_0 = Artifact.import_data(
    'FeatureData[Taxonomy]',
    <your data here>,
)
# SAVE: comment out the following with '# ' to skip saving this Result to disk
feature_data_taxonomy_0.save('feature_data_taxonomy_0')

feature_data_sequence_0 = Artifact.import_data(
    'FeatureData[Sequence]',
    <your data here>,
)
# SAVE: comment out the following with '# ' to skip saving this Result to disk
feature_data_sequence_0.save('feature_data_sequence_0')

sample_data_sequences_with_quality_0 = Artifact.import_data(
    'SampleData[SequencesWithQuality]',
    <your data here>,
)
# SAVE: comment out the following with '# ' to skip saving this Result to disk
sample_data_sequences_with_quality_0.save('sample_data_sequences_with_quality_0')

classifier_0, = feature_classifier_actions.fit_classifier_naive_bayes(
    reference_reads=feature_data_sequence_0,
    reference_taxonomy=feature_data_taxonomy_0,
    classify__alpha=0.001,
    classify__chunk_size=20000,
    classify__class_prior='null',
    classify__fit_prior=False,
    feat_ext__alternate_sign=False,
    feat_ext__analyzer='char_wb',
    feat_ext__binary=False,
    feat_ext__decode_error='strict',
    feat_ext__encoding='utf-8',
    feat_ext__input='content',
    feat_ext__lowercase=True,
    feat_ext__n_features=8192,
    feat_ext__ngram_range='[7, 7]',
    feat_ext__norm='l2',
    feat_ext__preprocessor='null',
    feat_ext__stop_words='null',
    feat_ext__strip_accents='null',
    feat_ext__token_pattern='(?u)\\b\\w\\w+\\b',
    feat_ext__tokenizer='null',
    verbose=False,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
classifier_0.save('classifier_0')

trimmed_sequences_0, = cutadapt_actions.trim_single(
    demultiplexed_sequences=sample_data_sequences_with_quality_0,
    cores=15,
    adapter=['CCGTCAATTCMTTTRAGT...CTGCTGCCTCCCGTAGG'],
    error_rate=0.1,
    indels=True,
    times=1,
    overlap=3,
    match_read_wildcards=False,
    match_adapter_wildcards=True,
    minimum_length=1,
    discard_untrimmed=True,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
trimmed_sequences_0.save('trimmed_sequences_0')

table_0, representative_sequences_0, _ = dada2_actions.denoise_pyro(
    demultiplexed_seqs=trimmed_sequences_0,
    trunc_len=150,
    trim_left=0,
    max_ee=2.0,
    trunc_q=2,
    max_len=0,
    pooling_method='independent',
    chimera_method='consensus',
    min_fold_parent_over_abundance=1.0,
    allow_one_off=False,
    n_threads=0,
    n_reads_learn=250000,
    hashed_feature_ids=True,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
representative_sequences_0.save('representative_sequences_0')
table_0.save('table_0')

filtered_table_0, = feature_table_actions.filter_features(
    table=table_0,
    min_frequency=0,
    min_samples=2,
    exclude_ids=False,
    filter_empty_samples=True,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
filtered_table_0.save('filtered_table_0')

filtered_data_0, = feature_table_actions.filter_seqs(
    data=representative_sequences_0,
    table=filtered_table_0,
    exclude_ids=False,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
filtered_data_0.save('filtered_data_0')

_, _, _, rooted_tree_0 = phylogeny_actions.align_to_tree_mafft_fasttree(
    sequences=filtered_data_0,
    n_threads=16,
    mask_max_gap_frequency=1.0,
    mask_min_conservation=0.4,
    parttree=False,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
rooted_tree_0.save('rooted_tree_0')

classification_0, = feature_classifier_actions.classify_sklearn(
    reads=filtered_data_0,
    classifier=classifier_0,
    reads_per_batch='auto',
    n_jobs=8,
    pre_dispatch='2*n_jobs',
    confidence=0.7,
    read_orientation='auto',
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
classification_0.save('classification_0')

# Replay attempts to represent metadata inputs accurately, but metadata .tsv
# files are merged automatically by some interfaces, rendering distinctions
# between file inputs invisible in provenance. We output the recorded metadata
# to disk to enable visual inspection.

# The following command may have received additional metadata .tsv files. To
# confirm you have covered your metadata needs adequately, review the original
# metadata, saved at
# './recorded_metadata/diversity_core_metrics_phylogenetic_0/'

# NOTE: You may substitute already-loaded Metadata for the following, or cast a
# pandas.DataFrame to Metadata as needed.

metadata_0_md = Metadata.load(<your metadata filepath>)
action_results = diversity_actions.core_metrics_phylogenetic(
    table=filtered_table_0,
    phylogeny=rooted_tree_0,
    sampling_depth=4000,
    metadata=metadata_0_md,
    with_replacement=False,
    n_jobs_or_threads=16,
)
rarefied_table_0 = action_results.rarefied_table
unweighted_unifrac_pcoa_results_0 = action_results.unweighted_unifrac_pcoa_results
# SAVE: comment out the following with '# ' to skip saving Results to disk
rarefied_table_0.save('rarefied_table_0')
unweighted_unifrac_pcoa_results_0.save('unweighted_unifrac_pcoa_results_0')

relative_frequency_table_0, = feature_table_actions.relative_frequency(
    table=rarefied_table_0,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
relative_frequency_table_0.save('relative_frequency_table_0')

biplot_0, = diversity_actions.pcoa_biplot(
    pcoa=unweighted_unifrac_pcoa_results_0,
    features=relative_frequency_table_0,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
biplot_0.save('biplot_0')

# The following command may have received additional metadata .tsv files. To
# confirm you have covered your metadata needs adequately, review the original
# metadata, saved at './recorded_metadata/empress_community_plot_0/'

# NOTE: You may substitute already-loaded Metadata for the following, or cast a
# pandas.DataFrame to Metadata as needed.

sample_metadata_0_md = Metadata.load(<your metadata filepath>)
# The following command may have received additional metadata .tsv files. To
# confirm you have covered your metadata needs adequately, review the original
# metadata, saved at './recorded_metadata/empress_community_plot_0/'

classification_0_a_0_md = classification_0.view(Metadata)
visualization_0_viz, = empress_actions.community_plot(
    tree=rooted_tree_0,
    feature_table=relative_frequency_table_0,
    pcoa=biplot_0,
    sample_metadata=sample_metadata_0_md,
    feature_metadata=classification_0_a_0_md,
    ignore_missing_samples=False,
    filter_extra_samples=False,
    filter_missing_features=False,
    number_of_features=5,
    shear_to_table=True,
)
# SAVE: comment out the following with '# ' to skip saving Results to disk
visualization_0_viz.save('visualization_0_viz')


# -----------------------------------------------------------------------------
# The following QIIME 2 Results were parsed to produce this script:
# a1a46509-66fc-4719-b72b-eaa23443bed4
# -----------------------------------------------------------------------------
	#!/usr/bin/env python
	# -----------------------------------------------------------------------------
	# Auto-generated by provenance_lib v.0.2.0 at 09:49:51 AM on 24 Jan, 2023

	# This document is a representation of the scholarly work of the creator of the
	# QIIME 2 Results provided as input to provenance_lib, and may be protected by
	# intellectual property law. Please respect all copyright restrictions and
	# licenses governing the use, modification, and redistribution of this work.

	# For User Support, post to the Community Plugin Support channel of the QIIME 2
	# Forum: https://forum.qiime2.org
	# Documentation/issues: https://github.com/qiime2/provenance_lib

	# UUIDs of all target QIIME 2 Results are shown at the end of the file

	# Instructions for use:
	# 1. Open this script in a text editor or IDE. Support for Python
	# syntax highlighting is helpful.
	# 2. Search or scan visually for '<' or '>' characters to find places where
	# user input (e.g. a filepath or column name) is required. If syntax
	# highlighting is enabled, '<' and '>' will appear as syntax errors.
	# 3. Search for 'FIXME' comments in the script, and respond as directed.
	# 4. Remove all 'FIXME' comments from the script completely. Failure to do so
	# may result in 'Missing Option' errors
	# 5. Adjust the arguments to the commands below to suit your data and metadata.
	# If your data is not identical to that in the replayed analysis,
	# changes may be required. (e.g. sample ids or rarefaction depth)
	# 6. Optional: search for 'SAVE' comments in the script, commenting out the
	# `some_result.save` lines for any Results you do not want saved to disk.
	# 7. Activate your replay conda environment, and confirm you have installed all
	# plugins used by the script.
	# 8. Run this script with `python <path to this script>`, or paste commands
	# into a python interpreter or jupyter notebook for an interactive analysis
	# -----------------------------------------------------------------------------

	from qiime2 import Artifact
	from qiime2 import Metadata
	import qiime2.plugins.cutadapt.actions as cutadapt_actions
	import qiime2.plugins.dada2.actions as dada2_actions
	import qiime2.plugins.diversity.actions as diversity_actions
	import qiime2.plugins.empress.actions as empress_actions
	import qiime2.plugins.feature_classifier.actions as feature_classifier_actions
	import qiime2.plugins.feature_table.actions as feature_table_actions
	import qiime2.plugins.phylogeny.actions as phylogeny_actions

	feature_data_taxonomy_0 = Artifact.import_data(
	'FeatureData[Taxonomy]',
	<your data here>,
	)
	# SAVE: comment out the following with '# ' to skip saving this Result to disk
	feature_data_taxonomy_0.save('feature_data_taxonomy_0')

	feature_data_sequence_0 = Artifact.import_data(
	'FeatureData[Sequence]',
	<your data here>,
	)
	# SAVE: comment out the following with '# ' to skip saving this Result to disk
	feature_data_sequence_0.save('feature_data_sequence_0')

	sample_data_sequences_with_quality_0 = Artifact.import_data(
	'SampleData[SequencesWithQuality]',
	<your data here>,
	)
	# SAVE: comment out the following with '# ' to skip saving this Result to disk
	sample_data_sequences_with_quality_0.save('sample_data_sequences_with_quality_0')

	classifier_0, = feature_classifier_actions.fit_classifier_naive_bayes(
	reference_reads=feature_data_sequence_0,
	reference_taxonomy=feature_data_taxonomy_0,
	classify__alpha=0.001,
	classify__chunk_size=20000,
	classify__class_prior='null',
	classify__fit_prior=False,
	feat_ext__alternate_sign=False,
	feat_ext__analyzer='char_wb',
	feat_ext__binary=False,
	feat_ext__decode_error='strict',
	feat_ext__encoding='utf-8',
	feat_ext__input='content',
	feat_ext__lowercase=True,
	feat_ext__n_features=8192,
	feat_ext__ngram_range='[7, 7]',
	feat_ext__norm='l2',
	feat_ext__preprocessor='null',
	feat_ext__stop_words='null',
	feat_ext__strip_accents='null',
	feat_ext__token_pattern='(?u)\\b\\w\\w+\\b',
	feat_ext__tokenizer='null',
	verbose=False,
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	classifier_0.save('classifier_0')

	trimmed_sequences_0, = cutadapt_actions.trim_single(
	demultiplexed_sequences=sample_data_sequences_with_quality_0,
	cores=15,
	adapter=['CCGTCAATTCMTTTRAGT...CTGCTGCCTCCCGTAGG'],
	error_rate=0.1,
	indels=True,
	times=1,
	overlap=3,
	match_read_wildcards=False,
	match_adapter_wildcards=True,
	minimum_length=1,
	discard_untrimmed=True,
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	trimmed_sequences_0.save('trimmed_sequences_0')

	table_0, representative_sequences_0, _ = dada2_actions.denoise_pyro(
	demultiplexed_seqs=trimmed_sequences_0,
	trunc_len=150,
	trim_left=0,
	max_ee=2.0,
	trunc_q=2,
	max_len=0,
	pooling_method='independent',
	chimera_method='consensus',
	min_fold_parent_over_abundance=1.0,
	allow_one_off=False,
	n_threads=0,
	n_reads_learn=250000,
	hashed_feature_ids=True,
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	representative_sequences_0.save('representative_sequences_0')
	table_0.save('table_0')

	filtered_table_0, = feature_table_actions.filter_features(
	table=table_0,
	min_frequency=0,
	min_samples=2,
	exclude_ids=False,
	filter_empty_samples=True,
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	filtered_table_0.save('filtered_table_0')

	filtered_data_0, = feature_table_actions.filter_seqs(
	data=representative_sequences_0,
	table=filtered_table_0,
	exclude_ids=False,
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	filtered_data_0.save('filtered_data_0')

	_, _, _, rooted_tree_0 = phylogeny_actions.align_to_tree_mafft_fasttree(
	sequences=filtered_data_0,
	n_threads=16,
	mask_max_gap_frequency=1.0,
	mask_min_conservation=0.4,
	parttree=False,
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	rooted_tree_0.save('rooted_tree_0')

	classification_0, = feature_classifier_actions.classify_sklearn(
	reads=filtered_data_0,
	classifier=classifier_0,
	reads_per_batch='auto',
	n_jobs=8,
	pre_dispatch='2*n_jobs',
	confidence=0.7,
	read_orientation='auto',
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	classification_0.save('classification_0')

	# Replay attempts to represent metadata inputs accurately, but metadata .tsv
	# files are merged automatically by some interfaces, rendering distinctions
	# between file inputs invisible in provenance. We output the recorded metadata
	# to disk to enable visual inspection.

	# The following command may have received additional metadata .tsv files. To
	# confirm you have covered your metadata needs adequately, review the original
	# metadata, saved at
	# './recorded_metadata/diversity_core_metrics_phylogenetic_0/'

	# NOTE: You may substitute already-loaded Metadata for the following, or cast a
	# pandas.DataFrame to Metadata as needed.

	metadata_0_md = Metadata.load(<your metadata filepath>)
	action_results = diversity_actions.core_metrics_phylogenetic(
	table=filtered_table_0,
	phylogeny=rooted_tree_0,
	sampling_depth=4000,
	metadata=metadata_0_md,
	with_replacement=False,
	n_jobs_or_threads=16,
	)
	rarefied_table_0 = action_results.rarefied_table
	unweighted_unifrac_pcoa_results_0 = action_results.unweighted_unifrac_pcoa_results
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	rarefied_table_0.save('rarefied_table_0')
	unweighted_unifrac_pcoa_results_0.save('unweighted_unifrac_pcoa_results_0')

	relative_frequency_table_0, = feature_table_actions.relative_frequency(
	table=rarefied_table_0,
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	relative_frequency_table_0.save('relative_frequency_table_0')

	biplot_0, = diversity_actions.pcoa_biplot(
	pcoa=unweighted_unifrac_pcoa_results_0,
	features=relative_frequency_table_0,
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	biplot_0.save('biplot_0')

	# The following command may have received additional metadata .tsv files. To
	# confirm you have covered your metadata needs adequately, review the original
	# metadata, saved at './recorded_metadata/empress_community_plot_0/'

	# NOTE: You may substitute already-loaded Metadata for the following, or cast a
	# pandas.DataFrame to Metadata as needed.

	sample_metadata_0_md = Metadata.load(<your metadata filepath>)
	# The following command may have received additional metadata .tsv files. To
	# confirm you have covered your metadata needs adequately, review the original
	# metadata, saved at './recorded_metadata/empress_community_plot_0/'

	classification_0_a_0_md = classification_0.view(Metadata)
	visualization_0_viz, = empress_actions.community_plot(
	tree=rooted_tree_0,
	feature_table=relative_frequency_table_0,
	pcoa=biplot_0,
	sample_metadata=sample_metadata_0_md,
	feature_metadata=classification_0_a_0_md,
	ignore_missing_samples=False,
	filter_extra_samples=False,
	filter_missing_features=False,
	number_of_features=5,
	shear_to_table=True,
	)
	# SAVE: comment out the following with '# ' to skip saving Results to disk
	visualization_0_viz.save('visualization_0_viz')


	# -----------------------------------------------------------------------------
	# The following QIIME 2 Results were parsed to produce this script:
	# a1a46509-66fc-4719-b72b-eaa23443bed4
	# -----------------------------------------------------------------------------