Created
May 23, 2020 19:07
-
-
Save yuenhsu/4d40b6c4ccc512e8884ff3c60ccdbfb6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xmltodict | |
import json | |
import numpy as np | |
import pandas as pd | |
#Parse XML to dictionary | |
with open('export.xml') as f: | |
health = xmltodict.parse(f.read()) | |
#Extract desired data and turn into a Data Frame | |
data = pd.DataFrame(health['HealthData']['Record']) | |
#Create two Data Frames for distance and steps walked | |
distance = data[data['@type'] == 'HKQuantityTypeIdentifierDistanceWalkingRunning'][['@value','@creationDate','@startDate','@endDate']] | |
step = data[data['@type'] == 'HKQuantityTypeIdentifierStepCount'][['@value','@creationDate','@startDate','@endDate']] | |
#Turn the kilometers into float data type from object and others into datetime | |
distance['@value'] = distance['@value'].astype(float) | |
for column in distance.columns: | |
if column != '@value': | |
distance[column] = pd.to_datetime(distance[column]) | |
#Create a new column to get the time differences | |
distance['@walkHours'] = distance['@endDate'] - distance['@startDate'] | |
#Resample to aggregate data by day and merge kilometres and walking time dataframes | |
data = pd.merge( | |
left = distance.resample("D",on='@startDate')['@walkHours'].sum().reset_index(), | |
right = distance.resample("D",on='@startDate')['@value'].sum().reset_index(), | |
on = '@startDate', | |
how = 'outer', | |
indicator = False).rename(columns={'@value':'@kms'}) | |
#Same with step | |
step['@value'] = step['@value'].astype(int) | |
for column in step.columns: | |
if column != '@value': | |
step[column] = pd.to_datetime(step[column]) | |
#Merge with earlier data | |
data = data.merge( | |
step.resample("D",on='@startDate')['@value'].sum().reset_index(), | |
on='@startDate', | |
how='outer').rename(columns={'@value':'@steps'}) | |
#Turn HH:MM:SS into minutes. | |
data['@walkMins'] = round((data['@walkHours'].dt.total_seconds() / 60), 2) | |
data = data[data['@walkMins'] < 1440] | |
#Filter out incorrect entries | |
data['@startDate'] = data['@startDate'].dt.tz_localize(None) | |
#Export to Excel | |
data.to_excel('health.xlsx') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment