walterst/parse_ipod_to_metadata.py

## parse_ipod_to_metadata.py
#!/usr/bin/env python
from __future__ import division
# USAGE: python parse_ipod_to_metadata.py mapping_file days_to_consider ipod_tab_delim_file raw_output_file qiime_compatible_output_file
# where days_to_consider counts the same-day as one of the days, and comma-seperated columns needs to be
# an exact match to the field label in the ipod data file, e.g. Gastrointestinal_issues
# All dates must be in the format of DD/MM/YY in the ipod source tab delimited data.


from sys import argv
from operator import itemgetter
from datetime import datetime, date

from numpy import array, mean

from qiime.parse import parse_mapping_file

# For now, a subset of the data, until all data can be quantified for parsing
target_fields = ["What time did you wake up today? (please use military time)",
                 "How much of your breakfast did you eat?",
                 "How much of your lunch did you eat?",
                 "How much of it did you eat?",
                 "Bottled water",
                 "Base purified water",
                 "Tap water",
                 "Soda",
                 "Sports energy drink (e.g. Gatorade)",
                 "Coffee or tea",
                 "Milk",
                 "Fruit juice",
                 "Yogurt",
                 "Beer, wine, or spirits",
                 "How are you feeling today?",
                 "Fever",
                 "Gastrointestinal issues",
                 "Headache",
                 "Tiredness",
                 "Runny or blocked nose",
                 "Rash",
                 "Muscle strain",
                 "Cramp",
                 "How much exercise did you do?",
                 "How many times did you urinate today?",
                 "How many times did you have a bowel movement today?",
                 "Please describe the consistency of your stool.",
                 "What time did you get into bed before going to sleep today? (please use military time)"
                 ]


portions_numeric = {
                     # all empty fields become NA
                     "":"NA",
                     # what fraction was eaten at breakfast, lunch, or dinner
                     "All of it":"4",
                     "3/4 of it":"3",
                     "1/2 of it":"2",
                     "1/4 of it":"1",
                     # reported wellness, from "How are you feeling today?"
                     "Good":"3",
                     "Ill":"2",
                     "Very Ill":"1",
                     # Exercise duration
                     "Less than 30 mins":"1",
                     "30 mins to 1 hour":"2",
                     "1 to 2 hours":"3",
                     "2+ hours":"4",
                     # Urination frequency
                     "1-2":"1.5",
                     "3-5":"4",
                     "6-9":"7.5",
                     "9+":"9",
                     # Bowel movement frequency
                     "0":"0",
                     "1":"1",
                     "2":"2",
                     "3":"3",
                     "4":"4",
                     "5":"5",
                     "6+":"6",
                     # Stool consistency
                     "Hard and formed (like a cigar)":"1",
                     "Soft and formed (like peanut butter)":"2",
                     "Loose and unformed (like a thick milkshake)":"3",
                     "Liquid (like water)":"4"
                     }

# for categories with only one item, just set to 1 or 0
presence_absence = {
                    "":"0",
                    "Bottled water":"1",
                    "Base purified water":"1",
                    "Tap water":"1",
                    "Soda":"1",
                    "Sports energy drink (e.g. Gatorade)":"1",
                    "Coffee or tea":"1",
                    "Milk":"1",
                    "Fruit juice":"1",
                    "Yogurt":"1",
                    "Beer, wine, or spirits":"1",
                    "Fever":"1",
                    "Gastrointestinal issues":"1",
                    "Headache":"1",
                    "Tiredness":"1",
                    "Runny or blocked nose":"1",
                    "Rash":"1",
                    "Muscle strain":"1",
                    "Cramp":"1",
                    }


target_fields_lookup = [None,
                        portions_numeric,
                        portions_numeric,
                        portions_numeric,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        portions_numeric,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        presence_absence,
                        portions_numeric,
                        portions_numeric,
                        portions_numeric,
                        portions_numeric,
                        None
                        ]


mapping_f = argv[1]

mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_f, 'U'))

days_considered = int(argv[2])

ipod_f = open(argv[3], "U")


raw_data_outf = open(argv[4], "w")

qiime_data_outf = open(argv[5], "w")
qiime_data_outf.write("#")

target_field_ixs = []
ipod_data = {}

""" Attempting this approach to organizing:
Data will be loaded from ipod touch form as a dictionary with tuples as keys of:
(4 digit ID code, ordinal date int value):[list of strip()ed data from tsv ipod specified headers]

Will have to query the dictionary for the presence of each id,date from the mapping file,
have KeyError exceptions to indicate missing data.
"""

# 2 samples from Turkey in ipod survey, also extra PHR sample, ignoring these for now
ignore_ids = ["2002","2004","2005","PHR"]

counter = 0
for line in ipod_f:
    curr_line = line.replace("\n","").split("\t")
    if len(curr_line) == 0:
        continue
    if line.startswith("#"):
        User_ix = curr_line.index("User")
        date_ipod_ix = curr_line.index("CorrectedDateDDMMYY")
        for curr_field in target_fields:
            target_field_ixs.append(curr_line.index(curr_field))
        continue
    curr_id = curr_line[User_ix].strip()
    # IF date is NA, or if in IDs to skip listed above, skip appending data
    if(curr_line[date_ipod_ix].strip() == "NA" or curr_id in ignore_ids):
        continue

    curr_date = datetime.strptime(curr_line[date_ipod_ix].strip(), '%d/%m/%y').date().toordinal()

    curr_added_data = []
    for n in target_field_ixs:
        curr_lookup = target_fields_lookup[target_field_ixs.index(n)]

        if curr_lookup:
            curr_val = curr_lookup[curr_line[n].strip()]
        else:
            curr_val = curr_line[n].strip()
        # Set empty fields to NA
        if len(curr_val) == 0:
             curr_val = "NA"
        curr_added_data.append(curr_val)

    ipod_data[(curr_id,curr_date)] = curr_added_data


date_mapping_ix = mapping_headers.index("SampleDate")
numeric_id_ix = mapping_headers.index("Numeric_ID")

# Build up dict of tuples of (id,ordinal date), skip inclusion if NA in either position.
id_dates_in_mapping = {}
# Built up similar dict of tuples as above, but for handling hours slept calculation
id_dates_in_mapping_sleep = {}
# Build up this data, so can be output later along with ipod data
metadata_line_in_mapping = {}

for line in mapping_data:
    if line[date_mapping_ix] == "NA" or line[numeric_id_ix] == "NA":
        continue
    curr_date = datetime.strptime(line[date_mapping_ix].strip(), '%d/%m/%y').date().toordinal()
    id_dates_in_mapping[line[numeric_id_ix].strip(), curr_date] = []
    id_dates_in_mapping_sleep[line[numeric_id_ix].strip(), curr_date] = []
    metadata_line_in_mapping[line[numeric_id_ix].strip(), curr_date] = line

# For each id:date key in dict, make list of target ids:dates to query from ipod data
# takes values from curr_date - (0 to days_considered) to find appropriate ordinal values

for curr_key in id_dates_in_mapping:
    for n in range(0, days_considered):
        id_dates_in_mapping[curr_key].append((curr_key[0], curr_key[1] - n))

# Copying this, but increasing by range by 1 to handle the extra day need for hours
# slept calculation
for curr_key in id_dates_in_mapping_sleep:
    for n in range(0, days_considered + 1):
        id_dates_in_mapping_sleep[curr_key].append((curr_key[0], curr_key[1] - n))


# Insert metadata headers considered into the end of the headers, before the last Description column

target_fields.append("Time_Slept")

corrected_headers = mapping_headers
for curr_header in target_fields:
    corrected_headers.insert(-1, curr_header)


raw_data = [corrected_headers]
qiime_data = [corrected_headers]


# Try to calculate the hours slept for each day (based upon prior day's time to sleep),
# add to data as additional field

awake_ix = 0
asleep_ix = -1

ipod_sleeping_hours = {}

count_missing_firstix = 0
count_missing_secondix = 0


for curr_key in id_dates_in_mapping_sleep:

    """ Explanation here for this-the data are sorted backwards, with the newest day
    first in the list being indexed. To do the time-time comparison for sleeping and
    awakening, each day is going to be queried, along with the next one in the list,
    and if both data exist, then convert the datetime object with year/month/date/hour,
    using the caveat that if the value of the hour is after midnight but before 11:30,
    increment the day by 1"""

    for curr_id_date_ix in range(len(id_dates_in_mapping_sleep[curr_key]) - 1):

        # Will often be empty, so have to do try/except commands
        try:
            curr_awake = ipod_data[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]][awake_ix]
        except KeyError:
            ipod_sleeping_hours[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]] = "NA"
            continue

        try:
            curr_asleep = ipod_data[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix+1]][asleep_ix]
        except KeyError:
            ipod_sleeping_hours[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]] = "NA"
            continue

        # If either field has NA value, continue, should not be many of these
        if curr_awake == "NA" or curr_asleep == "NA":
            ipod_sleeping_hours[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]] = "NA"
            continue
        if int(curr_asleep) >= 0 and int(curr_asleep) <= 1130:
            inc_date = 1
        else:
            inc_date = 0
        # Get the ordinal values back into year, month, day, correct the date if went to sleep in the AM
        datetime_awake = date.fromordinal(id_dates_in_mapping_sleep[curr_key][curr_id_date_ix][1])
        datetime_asleep = date.fromordinal(id_dates_in_mapping_sleep[curr_key][curr_id_date_ix+1][1] + inc_date)
        # Might be a more elegant way to handle this, but going to slice the time
        # based upon the number of digits present to get hours and minutes.
        if len(curr_awake) == 4:
            awake_hours = int(curr_awake[0:2])
            awake_mins = int(curr_awake[2:])
        elif len(curr_awake) == 3:
            awake_hours = int(curr_awake[0:1])
            awake_mins = int(curr_awake[1:])
        else:
            awake_hours = 0
            awake_mins = int(curr_awake)

        if len(curr_asleep) == 4:
            asleep_hours = int(curr_asleep[0:2])
            asleep_mins = int(curr_asleep[2:])
        elif len(curr_asleep) == 3:
            asleep_hours = int(curr_asleep[0:1])
            asleep_mins = int(curr_asleep[1:])
        else:
            asleep_hours = 0
            asleep_mins = int(curr_asleep)

        converted_awake = datetime(datetime_awake.year, datetime_awake.month,
            datetime_awake.day, awake_hours, awake_mins)
        converted_asleep = datetime(datetime_asleep.year, datetime_asleep.month,
            datetime_asleep.day, asleep_hours, asleep_mins)

        time_diff = converted_awake-converted_asleep
        time_slept_hours = time_diff.seconds/3600

        # add the slept hours to the ipod data as another field, but, need to build up
        # this data and add it outside of this loop, since we're still reading in the
        # ipod touch date during this loop
        ipod_sleeping_hours[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]] = "%2.2f" % time_slept_hours
        #hours_added = True
    #if not hours_added:
        #print curr_key
        #ipod_sleeping_hours.append((id_dates_in_mapping_sleep[curr_key][curr_id_date_ix],
        #    "NA"))


ipod_data_keys = set(ipod_data.keys())

for curr_indexdata in ipod_sleeping_hours:
    # All empty fields were given "NA" above, this just fills in subset to match the
    # actual data in ipod_data, needed for later parsing of the data
    if curr_indexdata in ipod_data_keys:
        ipod_data[curr_indexdata].append(ipod_sleeping_hours[curr_indexdata])


#for x in ipod_data:
#    print ipod_data[x]

# Now to query ipod data for each target id:date combo
for curr_key in id_dates_in_mapping:
    target_vals = []
    queried_dates = []
    average_vals = []
    fill_empty_fields = True # If no data found in date range, use this to fill empty data in final mapping
    for curr_id_date in id_dates_in_mapping[curr_key]:
        # Will often be empty, so have to do try/except commands
        try:
            target_vals.append(ipod_data[curr_id_date])
        except KeyError:
            continue
        queried_dates.append("%s" % date.fromordinal(curr_id_date[1]))
        fill_empty_fields = False

    # Transpose data, needed to step through values, ignore NA or empty fields
    transposed_vals = map(list, zip(*target_vals))
    for n in transposed_vals:
        curr_vals = []
        for x in n:
            if x == "NA" or x == '':
                continue
            curr_vals.append(float(x))
        # If empty, put "NA" in the field, else put average of values
        if len(curr_vals) == 0:
            average_vals.append("NA")
        else:
            average_vals.append("%4.2f" % (mean(array(curr_vals))))
    if fill_empty_fields:
        average_vals = ["NA"] * len(target_fields)

    # Write out raw data to log file
    raw_data_outf.write("****************************\n")
    raw_data_outf.write("4 digit ID and date for current date: %s,%s\n" % (curr_key[0], date.fromordinal(curr_key[1])))
    raw_data_outf.write("Dates from IPOD data queried: %s \n" % queried_dates)
    raw_data_outf.write("Headers queried: %s \n" % ",".join(target_fields))
    raw_data_outf.write("Raw values for each category: %s \n" % transposed_vals)
    raw_data_outf.write("Averaged values for each category: %s \n" % average_vals)
    raw_data_outf.write("Mapping metadata line associated with the above values: %s \n" % "\t".join(metadata_line_in_mapping[curr_key]))

    # Add data to metadata lines, write out to qiime-formatted file
    curr_metadata_line = metadata_line_in_mapping[curr_key]
    for curr_average in average_vals:
        curr_metadata_line.insert(-1, curr_average)
    qiime_data.append(curr_metadata_line)


for line in qiime_data:
    qiime_data_outf.write("\t".join(line))
    qiime_data_outf.write('\n')
"""

list.insert(location or -1, value) for inserting data before the end.
If no data are available, put NA in the field.

"""
	#!/usr/bin/env python
	from __future__ import division
	# USAGE: python parse_ipod_to_metadata.py mapping_file days_to_consider ipod_tab_delim_file raw_output_file qiime_compatible_output_file
	# where days_to_consider counts the same-day as one of the days, and comma-seperated columns needs to be
	# an exact match to the field label in the ipod data file, e.g. Gastrointestinal_issues
	# All dates must be in the format of DD/MM/YY in the ipod source tab delimited data.


	from sys import argv
	from operator import itemgetter
	from datetime import datetime, date

	from numpy import array, mean

	from qiime.parse import parse_mapping_file

	# For now, a subset of the data, until all data can be quantified for parsing
	target_fields = ["What time did you wake up today? (please use military time)",
	"How much of your breakfast did you eat?",
	"How much of your lunch did you eat?",
	"How much of it did you eat?",
	"Bottled water",
	"Base purified water",
	"Tap water",
	"Soda",
	"Sports energy drink (e.g. Gatorade)",
	"Coffee or tea",
	"Milk",
	"Fruit juice",
	"Yogurt",
	"Beer, wine, or spirits",
	"How are you feeling today?",
	"Fever",
	"Gastrointestinal issues",
	"Headache",
	"Tiredness",
	"Runny or blocked nose",
	"Rash",
	"Muscle strain",
	"Cramp",
	"How much exercise did you do?",
	"How many times did you urinate today?",
	"How many times did you have a bowel movement today?",
	"Please describe the consistency of your stool.",
	"What time did you get into bed before going to sleep today? (please use military time)"
	]


	portions_numeric = {
	# all empty fields become NA
	"":"NA",
	# what fraction was eaten at breakfast, lunch, or dinner
	"All of it":"4",
	"3/4 of it":"3",
	"1/2 of it":"2",
	"1/4 of it":"1",
	# reported wellness, from "How are you feeling today?"
	"Good":"3",
	"Ill":"2",
	"Very Ill":"1",
	# Exercise duration
	"Less than 30 mins":"1",
	"30 mins to 1 hour":"2",
	"1 to 2 hours":"3",
	"2+ hours":"4",
	# Urination frequency
	"1-2":"1.5",
	"3-5":"4",
	"6-9":"7.5",
	"9+":"9",
	# Bowel movement frequency
	"0":"0",
	"1":"1",
	"2":"2",
	"3":"3",
	"4":"4",
	"5":"5",
	"6+":"6",
	# Stool consistency
	"Hard and formed (like a cigar)":"1",
	"Soft and formed (like peanut butter)":"2",
	"Loose and unformed (like a thick milkshake)":"3",
	"Liquid (like water)":"4"
	}

	# for categories with only one item, just set to 1 or 0
	presence_absence = {
	"":"0",
	"Bottled water":"1",
	"Base purified water":"1",
	"Tap water":"1",
	"Soda":"1",
	"Sports energy drink (e.g. Gatorade)":"1",
	"Coffee or tea":"1",
	"Milk":"1",
	"Fruit juice":"1",
	"Yogurt":"1",
	"Beer, wine, or spirits":"1",
	"Fever":"1",
	"Gastrointestinal issues":"1",
	"Headache":"1",
	"Tiredness":"1",
	"Runny or blocked nose":"1",
	"Rash":"1",
	"Muscle strain":"1",
	"Cramp":"1",
	}



	target_fields_lookup = [None,
	portions_numeric,
	portions_numeric,
	portions_numeric,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	portions_numeric,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	presence_absence,
	portions_numeric,
	portions_numeric,
	portions_numeric,
	portions_numeric,
	None
	]


	mapping_f = argv[1]

	mapping_data, mapping_headers, _ = parse_mapping_file(open(mapping_f, 'U'))

	days_considered = int(argv[2])

	ipod_f = open(argv[3], "U")


	raw_data_outf = open(argv[4], "w")

	qiime_data_outf = open(argv[5], "w")
	qiime_data_outf.write("#")

	target_field_ixs = []
	ipod_data = {}

	""" Attempting this approach to organizing:
	Data will be loaded from ipod touch form as a dictionary with tuples as keys of:
	(4 digit ID code, ordinal date int value):[list of strip()ed data from tsv ipod specified headers]

	Will have to query the dictionary for the presence of each id,date from the mapping file,
	have KeyError exceptions to indicate missing data.
	"""

	# 2 samples from Turkey in ipod survey, also extra PHR sample, ignoring these for now
	ignore_ids = ["2002","2004","2005","PHR"]

	counter = 0
	for line in ipod_f:
	curr_line = line.replace("\n","").split("\t")
	if len(curr_line) == 0:
	continue
	if line.startswith("#"):
	User_ix = curr_line.index("User")
	date_ipod_ix = curr_line.index("CorrectedDateDDMMYY")
	for curr_field in target_fields:
	target_field_ixs.append(curr_line.index(curr_field))
	continue
	curr_id = curr_line[User_ix].strip()
	# IF date is NA, or if in IDs to skip listed above, skip appending data
	if(curr_line[date_ipod_ix].strip() == "NA" or curr_id in ignore_ids):
	continue

	curr_date = datetime.strptime(curr_line[date_ipod_ix].strip(), '%d/%m/%y').date().toordinal()

	curr_added_data = []
	for n in target_field_ixs:
	curr_lookup = target_fields_lookup[target_field_ixs.index(n)]

	if curr_lookup:
	curr_val = curr_lookup[curr_line[n].strip()]
	else:
	curr_val = curr_line[n].strip()
	# Set empty fields to NA
	if len(curr_val) == 0:
	curr_val = "NA"
	curr_added_data.append(curr_val)

	ipod_data[(curr_id,curr_date)] = curr_added_data


	date_mapping_ix = mapping_headers.index("SampleDate")
	numeric_id_ix = mapping_headers.index("Numeric_ID")

	# Build up dict of tuples of (id,ordinal date), skip inclusion if NA in either position.
	id_dates_in_mapping = {}
	# Built up similar dict of tuples as above, but for handling hours slept calculation
	id_dates_in_mapping_sleep = {}
	# Build up this data, so can be output later along with ipod data
	metadata_line_in_mapping = {}

	for line in mapping_data:
	if line[date_mapping_ix] == "NA" or line[numeric_id_ix] == "NA":
	continue
	curr_date = datetime.strptime(line[date_mapping_ix].strip(), '%d/%m/%y').date().toordinal()
	id_dates_in_mapping[line[numeric_id_ix].strip(), curr_date] = []
	id_dates_in_mapping_sleep[line[numeric_id_ix].strip(), curr_date] = []
	metadata_line_in_mapping[line[numeric_id_ix].strip(), curr_date] = line

	# For each id:date key in dict, make list of target ids:dates to query from ipod data
	# takes values from curr_date - (0 to days_considered) to find appropriate ordinal values

	for curr_key in id_dates_in_mapping:
	for n in range(0, days_considered):
	id_dates_in_mapping[curr_key].append((curr_key[0], curr_key[1] - n))

	# Copying this, but increasing by range by 1 to handle the extra day need for hours
	# slept calculation
	for curr_key in id_dates_in_mapping_sleep:
	for n in range(0, days_considered + 1):
	id_dates_in_mapping_sleep[curr_key].append((curr_key[0], curr_key[1] - n))




	# Insert metadata headers considered into the end of the headers, before the last Description column

	target_fields.append("Time_Slept")

	corrected_headers = mapping_headers
	for curr_header in target_fields:
	corrected_headers.insert(-1, curr_header)


	raw_data = [corrected_headers]
	qiime_data = [corrected_headers]


	# Try to calculate the hours slept for each day (based upon prior day's time to sleep),
	# add to data as additional field

	awake_ix = 0
	asleep_ix = -1

	ipod_sleeping_hours = {}

	count_missing_firstix = 0
	count_missing_secondix = 0


	for curr_key in id_dates_in_mapping_sleep:

	""" Explanation here for this-the data are sorted backwards, with the newest day
	first in the list being indexed. To do the time-time comparison for sleeping and
	awakening, each day is going to be queried, along with the next one in the list,
	and if both data exist, then convert the datetime object with year/month/date/hour,
	using the caveat that if the value of the hour is after midnight but before 11:30,
	increment the day by 1"""

	for curr_id_date_ix in range(len(id_dates_in_mapping_sleep[curr_key]) - 1):

	# Will often be empty, so have to do try/except commands
	try:
	curr_awake = ipod_data[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]][awake_ix]
	except KeyError:
	ipod_sleeping_hours[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]] = "NA"
	continue

	try:
	curr_asleep = ipod_data[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix+1]][asleep_ix]
	except KeyError:
	ipod_sleeping_hours[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]] = "NA"
	continue

	# If either field has NA value, continue, should not be many of these
	if curr_awake == "NA" or curr_asleep == "NA":
	ipod_sleeping_hours[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]] = "NA"
	continue
	if int(curr_asleep) >= 0 and int(curr_asleep) <= 1130:
	inc_date = 1
	else:
	inc_date = 0
	# Get the ordinal values back into year, month, day, correct the date if went to sleep in the AM
	datetime_awake = date.fromordinal(id_dates_in_mapping_sleep[curr_key][curr_id_date_ix][1])
	datetime_asleep = date.fromordinal(id_dates_in_mapping_sleep[curr_key][curr_id_date_ix+1][1] + inc_date)
	# Might be a more elegant way to handle this, but going to slice the time
	# based upon the number of digits present to get hours and minutes.
	if len(curr_awake) == 4:
	awake_hours = int(curr_awake[0:2])
	awake_mins = int(curr_awake[2:])
	elif len(curr_awake) == 3:
	awake_hours = int(curr_awake[0:1])
	awake_mins = int(curr_awake[1:])
	else:
	awake_hours = 0
	awake_mins = int(curr_awake)

	if len(curr_asleep) == 4:
	asleep_hours = int(curr_asleep[0:2])
	asleep_mins = int(curr_asleep[2:])
	elif len(curr_asleep) == 3:
	asleep_hours = int(curr_asleep[0:1])
	asleep_mins = int(curr_asleep[1:])
	else:
	asleep_hours = 0
	asleep_mins = int(curr_asleep)

	converted_awake = datetime(datetime_awake.year, datetime_awake.month,
	datetime_awake.day, awake_hours, awake_mins)
	converted_asleep = datetime(datetime_asleep.year, datetime_asleep.month,
	datetime_asleep.day, asleep_hours, asleep_mins)

	time_diff = converted_awake-converted_asleep
	time_slept_hours = time_diff.seconds/3600

	# add the slept hours to the ipod data as another field, but, need to build up
	# this data and add it outside of this loop, since we're still reading in the
	# ipod touch date during this loop
	ipod_sleeping_hours[id_dates_in_mapping_sleep[curr_key][curr_id_date_ix]] = "%2.2f" % time_slept_hours
	#hours_added = True
	#if not hours_added:
	#print curr_key
	#ipod_sleeping_hours.append((id_dates_in_mapping_sleep[curr_key][curr_id_date_ix],
	# "NA"))


	ipod_data_keys = set(ipod_data.keys())

	for curr_indexdata in ipod_sleeping_hours:
	# All empty fields were given "NA" above, this just fills in subset to match the
	# actual data in ipod_data, needed for later parsing of the data
	if curr_indexdata in ipod_data_keys:
	ipod_data[curr_indexdata].append(ipod_sleeping_hours[curr_indexdata])



	#for x in ipod_data:
	# print ipod_data[x]

	# Now to query ipod data for each target id:date combo
	for curr_key in id_dates_in_mapping:
	target_vals = []
	queried_dates = []
	average_vals = []
	fill_empty_fields = True # If no data found in date range, use this to fill empty data in final mapping
	for curr_id_date in id_dates_in_mapping[curr_key]:
	# Will often be empty, so have to do try/except commands
	try:
	target_vals.append(ipod_data[curr_id_date])
	except KeyError:
	continue
	queried_dates.append("%s" % date.fromordinal(curr_id_date[1]))
	fill_empty_fields = False

	# Transpose data, needed to step through values, ignore NA or empty fields
	transposed_vals = map(list, zip(*target_vals))
	for n in transposed_vals:
	curr_vals = []
	for x in n:
	if x == "NA" or x == '':
	continue
	curr_vals.append(float(x))
	# If empty, put "NA" in the field, else put average of values
	if len(curr_vals) == 0:
	average_vals.append("NA")
	else:
	average_vals.append("%4.2f" % (mean(array(curr_vals))))
	if fill_empty_fields:
	average_vals = ["NA"] * len(target_fields)

	# Write out raw data to log file
	raw_data_outf.write("****************************\n")
	raw_data_outf.write("4 digit ID and date for current date: %s,%s\n" % (curr_key[0], date.fromordinal(curr_key[1])))
	raw_data_outf.write("Dates from IPOD data queried: %s \n" % queried_dates)
	raw_data_outf.write("Headers queried: %s \n" % ",".join(target_fields))
	raw_data_outf.write("Raw values for each category: %s \n" % transposed_vals)
	raw_data_outf.write("Averaged values for each category: %s \n" % average_vals)
	raw_data_outf.write("Mapping metadata line associated with the above values: %s \n" % "\t".join(metadata_line_in_mapping[curr_key]))

	# Add data to metadata lines, write out to qiime-formatted file
	curr_metadata_line = metadata_line_in_mapping[curr_key]
	for curr_average in average_vals:
	curr_metadata_line.insert(-1, curr_average)
	qiime_data.append(curr_metadata_line)


	for line in qiime_data:
	qiime_data_outf.write("\t".join(line))
	qiime_data_outf.write('\n')
	"""

	list.insert(location or -1, value) for inserting data before the end.
	If no data are available, put NA in the field.

	"""