Created
March 15, 2023 20:47
-
-
Save grcatlin/803a3db0bdd8f380e675c83e50d5dd9c to your computer and use it in GitHub Desktop.
Apple Watch Data in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(XML) | |
library(data.table) | |
library(lubridate) | |
library(ggplot2) | |
library(stringr) | |
library(leaflet) | |
# import and divide xml file ---------------------------------------------- | |
# import | |
xml_dat = xmlParse("Data/apple_health_export/export.xml") | |
# split records & clean times | |
health_dat = as.data.table(XML:::xmlAttrsToDataFrame(xml_dat["//Record"])) | |
health_dat[, startDate := ymd_hms(startDate, tz = "America/Denver")] | |
health_dat[, endDate := ymd_hms(endDate, tz = "America/Denver")] | |
# split workouts & clean times | |
workout_dat = as.data.table(XML:::xmlAttrsToDataFrame(xml_dat["//Workout"])) | |
workout_dat[, startDate := ymd_hms(startDate, tz = "America/Denver")] | |
workout_dat[, endDate := ymd_hms(endDate, tz = "America/Denver")] | |
# split summaries & clean times | |
summary_dat = as.data.table(XML:::xmlAttrsToDataFrame(xml_dat["//ActivitySummary"])) | |
summary_dat[, dateComponents := ymd(dateComponents)] | |
# what records can we view? ----------------------------------------------- | |
# health records | |
unique(health_dat$type) | |
# workout records | |
unique(workout_dat$workoutActivityType) | |
# mindfulness + HR data subset -------------------------------------------- | |
# subset to mindfulness records | |
mindful_dat = health_dat[type == "HKCategoryTypeIdentifierMindfulSession"] | |
# only meaningful item is duration | |
mindful_dat = mindful_dat[, .(startDate, endDate)] | |
mindful_dat[, sessionLength := difftime(endDate, startDate, units = "mins")] | |
mindful_dat[, sessionLength := as.numeric(sessionLength, units = "mins")] | |
mindful_dat[, sessionID := 1:.N] | |
# subset to heart rate records | |
heart_dat = health_dat[type == "HKQuantityTypeIdentifierHeartRate"] | |
heart_dat = heart_dat[, .(startDate, endDate, HR = as.numeric(value))] | |
# subset further to records within ± 30 minutes of session, not including workouts | |
hr_mind = data.table() | |
for (record in 1:nrow(mindful_dat)) { | |
# subset mindful record | |
mindful_record = mindful_dat[record] | |
# add ± 30 minute variables | |
mindful_record[, startDateMinus := startDate - minutes(30)] | |
mindful_record[, endDatePlus := endDate + minutes(30)] | |
plusminus = c(mindful_record$startDateMinus, mindful_record$endDatePlus) | |
actual = c(mindful_record$startDate, mindful_record$endDate) | |
# subset heart rate records | |
hr_record = heart_dat[startDate %between% plusminus | endDate %between% plusminus] | |
hr_record[, duringSession := ifelse(startDate %between% actual | | |
endDate %between% actual, 1, 0)] | |
# remove HR's during workout | |
for (workout in 1:nrow(workout_dat)) { | |
# subset workout record | |
workout_record = workout_dat[workout] | |
# find interval | |
workout_interval = c(workout_record$startDate, workout_record$endDate) | |
# label in hr_record | |
hr_record[, duringWorkout := ifelse(startDate %between% workout_interval | | |
endDate %between% workout_interval, 1, 0)] | |
} | |
hr_record = hr_record[duringWorkout == 0] | |
hr_record[, duringWorkout := NULL] | |
# label session | |
hr_record[, sessionID := mindful_record$sessionID] | |
# save | |
hr_mind = rbind(hr_mind, hr_record) | |
} | |
# mindful HR statistics & viz --------------------------------------------- | |
# convert duringSession to factor | |
hr_mind[, duringSession := as.factor(duringSession)] | |
# look at boxplot | |
ggplot(hr_mind, aes(x = duringSession, y = HR, fill = duringSession)) + | |
geom_boxplot() + | |
theme_minimal() | |
# remove outliers | |
hr_mind = hr_mind[,.SD[HR < quantile(HR, probs = 0.95)], by = duringSession] | |
# look at boxplot (again) | |
ggplot(hr_mind, aes(x = duringSession, y = HR, fill = duringSession)) + | |
geom_boxplot() + | |
theme_minimal() | |
# lm | |
mod = lm(HR ~ duringSession, data = hr_mind) | |
summary(mod) | |
# map a hike -------------------------------------------------------------- | |
# get hikes | |
hikes = workout_dat[workoutActivityType == "HKWorkoutActivityTypeHiking"] | |
# get most recent hike date | |
hike_date = as_date(hikes[endDate == max(endDate)]$endDate) | |
# fetch .gpx file | |
route_list = list.files("Data/apple_health_export/workout-routes/") | |
route = str_which(route_list, paste0(hike_date)) | |
filename = paste0("Data/apple_health_export/workout-routes/",route_list[route]) | |
hike_gpx = htmlTreeParse(file = filename, useInternalNodes = TRUE) | |
# get coords & elevation | |
coords = xpathSApply(hike_gpx, path = "//trkpt", fun = xmlAttrs) | |
elevation = xpathSApply(hike_gpx, path = "//trkpt/ele", fun = xmlValue) | |
# create data.table | |
hike_route = data.table( | |
LAT = as.numeric(coords["lat",]), | |
LON = as.numeric(coords["lon",]), | |
ELEVATION = as.numeric(elevation) | |
) | |
# leaflet | |
leaflet() %>% | |
addTiles() %>% | |
addPolylines(data = hike_route, | |
lat = ~ LAT, | |
lng = ~ LON, | |
color = "#AE2573") | |
# ggplot elevation | |
hike_route[, TIME := 1:.N] | |
ggplot(hike_route, aes(x = TIME, y = ELEVATION)) + | |
geom_line(linewidth = 2) + | |
theme_minimal() | |
# view standing hours ----------------------------------------------------- | |
# plot | |
ggplot(summary_dat, aes(x = dateComponents, y = as.numeric(appleStandHours))) + | |
geom_point(color = "#7cb7a3", size = 2) + | |
geom_smooth(color = "#AE2573", fill = "#AE2573") + | |
xlab("Date") + | |
ylab("Standing Hours") + | |
theme_minimal() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This code accompanies a blog post, found here 💻