fedarko/convert_timestamp_to_days_elapsed.py

## convert_timestamp_to_days_elapsed.py
#! /usr/bin/env python3
from qiime2 import Metadata
from dateutil.parser import parse

m = Metadata.load("metadata-with-age-and-ordinal-timestamp.tsv")
m_df = m.to_dataframe()

# Compute earliest date
min_date = None
for sample_id in m_df.index:
    parsed_date = parse(m_df.loc[sample_id, "collection_timestamp"])
    if min_date is None or parsed_date < min_date:
        min_date = parsed_date

print("Earliest date is {}".format(min_date))

# Assign "days from first timestamp" metric for each sample
# (the sample(s) taken on min_date should have a value of 0, and samples taken
# exactly a day later would have a value of 1, ...)
# There is some inherent imprecision here due to different levels of precision
# in sample collection (e.g. down to the day vs. down to the minute), but this
# should be sufficient for exploratory visualization.
m_df["days_since_first_day"] = 0

for sample_id in m_df.index:
    parsed_date = parse(m_df.loc[sample_id, "collection_timestamp"])
    # Note the avoidance of relativedelta -- see
    # https://stackoverflow.com/a/48262147/10730311
    days_since = (parsed_date - min_date).days
    m_df.loc[sample_id, "days_since_first_day"] = days_since

Metadata(m_df).save("metadata-with-age-and-fancy-dates.tsv")
	#! /usr/bin/env python3
	from qiime2 import Metadata
	from dateutil.parser import parse

	m = Metadata.load("metadata-with-age-and-ordinal-timestamp.tsv")
	m_df = m.to_dataframe()

	# Compute earliest date
	min_date = None
	for sample_id in m_df.index:
	parsed_date = parse(m_df.loc[sample_id, "collection_timestamp"])
	if min_date is None or parsed_date < min_date:
	min_date = parsed_date

	print("Earliest date is {}".format(min_date))

	# Assign "days from first timestamp" metric for each sample
	# (the sample(s) taken on min_date should have a value of 0, and samples taken
	# exactly a day later would have a value of 1, ...)
	# There is some inherent imprecision here due to different levels of precision
	# in sample collection (e.g. down to the day vs. down to the minute), but this
	# should be sufficient for exploratory visualization.
	m_df["days_since_first_day"] = 0

	for sample_id in m_df.index:
	parsed_date = parse(m_df.loc[sample_id, "collection_timestamp"])
	# Note the avoidance of relativedelta -- see
	# https://stackoverflow.com/a/48262147/10730311
	days_since = (parsed_date - min_date).days
	m_df.loc[sample_id, "days_since_first_day"] = days_since

	Metadata(m_df).save("metadata-with-age-and-fancy-dates.tsv")