Skip to content

Instantly share code, notes, and snippets.

@thiagomarzagao
Created February 25, 2020 02:44
Show Gist options
  • Save thiagomarzagao/f2fc571fa6ccd29bb87e14717d7ba65b to your computer and use it in GitHub Desktop.
Save thiagomarzagao/f2fc571fa6ccd29bb87e14717d7ba65b to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
from collections import Counter
from bs4 import BeautifulSoup
from scipy import stats
from matplotlib import pyplot as plt
path = '/path/to/export.xml'
with open(path) as f:
raw = f.read()
soup = BeautifulSoup(raw, 'xml')
cnt_runn_min = Counter()
cnt_runn_cal = Counter()
tag_runn = soup.find_all('Workout', {'workoutActivityType': 'HKWorkoutActivityTypeRunning'})
for e in tag_runn:
date = e['creationDate'].split()[0]
runn_min = float(e['duration'])
runn_cal = float(e['totalEnergyBurned'])
cnt_runn_min[date] += runn_min
cnt_runn_cal[date] += runn_cal
l = []
for key in cnt_runn_min.keys():
l.append((key, cnt_runn_min[key]))
df = pd.DataFrame(l)
df.columns = ['date', 'running_minutes']
df.to_csv('running_minutes.csv', index = False)
l = []
for key in cnt_runn_cal.keys():
l.append((key, cnt_runn_cal[key]))
df = pd.DataFrame(l)
df.columns = ['date', 'running_calories']
df.to_csv('running_calories.csv', index = False)
cnt_lift_min = Counter()
cnt_lift_cal = Counter()
tag_lift = soup.find_all('Workout', {'workoutActivityType': 'HKWorkoutActivityTypeOther'})
for e in tag_lift:
date = e['creationDate'].split()[0]
lift_min = float(e['duration'])
lift_cal = float(e['totalEnergyBurned'])
cnt_lift_min[date] += lift_min
cnt_lift_cal[date] += lift_cal
l = []
for key in cnt_lift_min.keys():
l.append((key, cnt_lift_min[key]))
df = pd.DataFrame(l)
df.columns = ['date', 'lifting_minutes']
df.to_csv('lifting_minutes.csv', index = False)
l = []
for key in cnt_lift_cal.keys():
l.append((key, cnt_lift_cal[key]))
df = pd.DataFrame(l)
df.columns = ['date', 'lifting_calories']
df.to_csv('lifting_calories.csv', index = False)
cnt_calr = Counter()
tag_cal1 = soup.find_all('Record', {'type': 'HKQuantityTypeIdentifierActiveEnergyBurned'})
for e in tag_cal1:
date = e['creationDate'].split()[0]
calr = float(e['value'])
cnt_calr[date] += calr
tag_cal2 = soup.find_all('Record', {'type': 'HKQuantityTypeIdentifierBasalEnergyBurned'})
for e in tag_cal2:
date = e['creationDate'].split()[0]
calr = float(e['value'])
cnt_calr[date] += calr
l = []
for key in cnt_calr.keys():
l.append((key, cnt_calr[key]))
df = pd.DataFrame(l)
df.columns = ['date', 'calories']
df.to_csv('calories.csv', index = False)
cnt_step = Counter()
tag_step = soup.find_all('Record', {'type': 'HKQuantityTypeIdentifierStepCount'})
for e in tag_step:
date = e['creationDate'].split()[0]
step = int(e['value']) # number of steps
cnt_step[date] += step
l = []
for key in cnt_step.keys():
l.append((key, cnt_step[key]))
df = pd.DataFrame(l)
df.columns = ['date', 'steps']
df.to_csv('steps.csv', index = False)
cnt_dist = Counter()
tag_dist = soup.find_all('Record', {'type': 'HKQuantityTypeIdentifierDistanceWalkingRunning'})
for e in tag_dist:
date = e['creationDate'].split()[0]
dist = float(e['value']) # distance in km
cnt_dist[date] += dist
l = []
for key in cnt_dist.keys():
l.append((key, cnt_dist[key]))
df = pd.DataFrame(l)
df.columns = ['date', 'distance']
df.to_csv('distance.csv', index = False)
filenames = [
'calories.csv',
'distance.csv',
'lifting_calories.csv',
'lifting_minutes.csv',
'running_calories.csv',
'running_minutes.csv',
'steps.csv'
]
for filename in filenames:
df = pd.read_csv(filename)
df['date'] = pd.to_datetime(df['date'])
df.index = df['date']
del df['date']
print(df)
print(type(df))
df = df[(np.abs(stats.zscore(df)) < 3).all(axis = 1)]
df = df.rolling(window = 14).mean()
df.plot()
plt.axvline('2019-08-01', color = 'red')
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment