Skip to content

Instantly share code, notes, and snippets.

@fedarko
Last active August 30, 2019 06:25
Show Gist options
  • Save fedarko/49088da6bba5705f987192a954b2416f to your computer and use it in GitHub Desktop.
Save fedarko/49088da6bba5705f987192a954b2416f to your computer and use it in GitHub Desktop.
Adds an "age in years" column to a QIIME 2 sample metadata file
#! /usr/bin/env python3
from qiime2 import Metadata
from dateutil.parser import parse
from dateutil.relativedelta import relativedelta
subject_id = "HOST SUBJECT ID"
subject_birthday = "HOST BIRTHDAY"
subject_birthday_datetime = parse(subject_birthday)
age_col_name = "subject_age_years"
m = Metadata.load("metadata.tsv")
m_df = m.to_dataframe()
m_df[age_col_name] = 0
for sample_id in m_df.index:
# We only compute age for samples with the specified host_subject_id
if m_df.loc[sample_id, "host_subject_id"] == subject_id:
# Parse sample timestamp
sample_timestamp = m_df["collection_timestamp"][sample_id]
sample_datetime = parse(sample_timestamp)
rd = relativedelta(sample_datetime, subject_birthday_datetime)
# Sanity check: the subject should never be negative years old
if rd.years > 0:
m_df.loc[sample_id, age_col_name] = str(rd.years)
else:
raise ValueError(
"Sample {} has a collection_timestamp, {}, occurring before "
"the subject_birthday of {}.".format(
sample_id, sample_timestamp, subject_birthday
)
)
else:
m_df.loc[sample_id, age_col_name] = "not applicable"
Metadata(m_df).save("output_metadata.tsv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment