Skip to content

Instantly share code, notes, and snippets.

@fedarko
Last active July 25, 2019 05:51
Show Gist options
  • Save fedarko/591eb47cc5f12b4ef8a6fbb3c9da5462 to your computer and use it in GitHub Desktop.
Save fedarko/591eb47cc5f12b4ef8a6fbb3c9da5462 to your computer and use it in GitHub Desktop.
Filter a QIIME 2 feature table and a metadata TSV file to shared samples
#! /usr/bin/env python3
import biom
from qiime2 import Artifact, Metadata
YOUR_QZA_TABLE_FILEPATH_GOES_HERE = "table.qza"
YOUR_METADATA_FILEPATH_GOES_HERE = "metadata.tsv"
# Load the table
tbl_qza = Artifact.load(YOUR_QZA_TABLE_FILEPATH_GOES_HERE)
t = tbl_qza.view(biom.Table)
# Load the sample metadata
md = Metadata.load(YOUR_METADATA_FILEPATH_GOES_HERE)
# How many samples are in the table, and how many are in the metadata?
print("{} samples are in the table.".format(t.shape[1]))
print("{} samples are in the sample metadata.".format(len(md.ids)))
shared_ids = set(t.ids()) & set(md.ids)
print(
"{} samples are shared between the table and sample metadata.".format(
len(shared_ids)
)
)
# Actually filter the table and metadata to the shared IDs.
t.filter(shared_ids)
md = md.filter_ids(shared_ids)
# Small sanity checks that this worked
if t.shape[1] != len(md.ids):
raise ValueError("Number of samples differs. Filtering didn't work.")
if len(set(t.ids()) & set(md.ids)) != len(md.ids):
raise ValueError("Some samples still not shared. Filtering didn't work.")
# Write out the filtered table and metadata
tbl_qza_filtered = Artifact.import_data("FeatureTable[Frequency]", t)
tbl_qza_filtered.save("filtered-table.qza")
md.save("filtered-metadata.tsv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment