Skip to content

Instantly share code, notes, and snippets.

@grcatlin
Created March 15, 2023 20:47
Show Gist options
  • Save grcatlin/803a3db0bdd8f380e675c83e50d5dd9c to your computer and use it in GitHub Desktop.
Save grcatlin/803a3db0bdd8f380e675c83e50d5dd9c to your computer and use it in GitHub Desktop.
Apple Watch Data in R
library(XML)
library(data.table)
library(lubridate)
library(ggplot2)
library(stringr)
library(leaflet)
# import and divide xml file ----------------------------------------------
# import
xml_dat = xmlParse("Data/apple_health_export/export.xml")
# split records & clean times
health_dat = as.data.table(XML:::xmlAttrsToDataFrame(xml_dat["//Record"]))
health_dat[, startDate := ymd_hms(startDate, tz = "America/Denver")]
health_dat[, endDate := ymd_hms(endDate, tz = "America/Denver")]
# split workouts & clean times
workout_dat = as.data.table(XML:::xmlAttrsToDataFrame(xml_dat["//Workout"]))
workout_dat[, startDate := ymd_hms(startDate, tz = "America/Denver")]
workout_dat[, endDate := ymd_hms(endDate, tz = "America/Denver")]
# split summaries & clean times
summary_dat = as.data.table(XML:::xmlAttrsToDataFrame(xml_dat["//ActivitySummary"]))
summary_dat[, dateComponents := ymd(dateComponents)]
# what records can we view? -----------------------------------------------
# health records
unique(health_dat$type)
# workout records
unique(workout_dat$workoutActivityType)
# mindfulness + HR data subset --------------------------------------------
# subset to mindfulness records
mindful_dat = health_dat[type == "HKCategoryTypeIdentifierMindfulSession"]
# only meaningful item is duration
mindful_dat = mindful_dat[, .(startDate, endDate)]
mindful_dat[, sessionLength := difftime(endDate, startDate, units = "mins")]
mindful_dat[, sessionLength := as.numeric(sessionLength, units = "mins")]
mindful_dat[, sessionID := 1:.N]
# subset to heart rate records
heart_dat = health_dat[type == "HKQuantityTypeIdentifierHeartRate"]
heart_dat = heart_dat[, .(startDate, endDate, HR = as.numeric(value))]
# subset further to records within ± 30 minutes of session, not including workouts
hr_mind = data.table()
for (record in 1:nrow(mindful_dat)) {
# subset mindful record
mindful_record = mindful_dat[record]
# add ± 30 minute variables
mindful_record[, startDateMinus := startDate - minutes(30)]
mindful_record[, endDatePlus := endDate + minutes(30)]
plusminus = c(mindful_record$startDateMinus, mindful_record$endDatePlus)
actual = c(mindful_record$startDate, mindful_record$endDate)
# subset heart rate records
hr_record = heart_dat[startDate %between% plusminus | endDate %between% plusminus]
hr_record[, duringSession := ifelse(startDate %between% actual |
endDate %between% actual, 1, 0)]
# remove HR's during workout
for (workout in 1:nrow(workout_dat)) {
# subset workout record
workout_record = workout_dat[workout]
# find interval
workout_interval = c(workout_record$startDate, workout_record$endDate)
# label in hr_record
hr_record[, duringWorkout := ifelse(startDate %between% workout_interval |
endDate %between% workout_interval, 1, 0)]
}
hr_record = hr_record[duringWorkout == 0]
hr_record[, duringWorkout := NULL]
# label session
hr_record[, sessionID := mindful_record$sessionID]
# save
hr_mind = rbind(hr_mind, hr_record)
}
# mindful HR statistics & viz ---------------------------------------------
# convert duringSession to factor
hr_mind[, duringSession := as.factor(duringSession)]
# look at boxplot
ggplot(hr_mind, aes(x = duringSession, y = HR, fill = duringSession)) +
geom_boxplot() +
theme_minimal()
# remove outliers
hr_mind = hr_mind[,.SD[HR < quantile(HR, probs = 0.95)], by = duringSession]
# look at boxplot (again)
ggplot(hr_mind, aes(x = duringSession, y = HR, fill = duringSession)) +
geom_boxplot() +
theme_minimal()
# lm
mod = lm(HR ~ duringSession, data = hr_mind)
summary(mod)
# map a hike --------------------------------------------------------------
# get hikes
hikes = workout_dat[workoutActivityType == "HKWorkoutActivityTypeHiking"]
# get most recent hike date
hike_date = as_date(hikes[endDate == max(endDate)]$endDate)
# fetch .gpx file
route_list = list.files("Data/apple_health_export/workout-routes/")
route = str_which(route_list, paste0(hike_date))
filename = paste0("Data/apple_health_export/workout-routes/",route_list[route])
hike_gpx = htmlTreeParse(file = filename, useInternalNodes = TRUE)
# get coords & elevation
coords = xpathSApply(hike_gpx, path = "//trkpt", fun = xmlAttrs)
elevation = xpathSApply(hike_gpx, path = "//trkpt/ele", fun = xmlValue)
# create data.table
hike_route = data.table(
LAT = as.numeric(coords["lat",]),
LON = as.numeric(coords["lon",]),
ELEVATION = as.numeric(elevation)
)
# leaflet
leaflet() %>%
addTiles() %>%
addPolylines(data = hike_route,
lat = ~ LAT,
lng = ~ LON,
color = "#AE2573")
# ggplot elevation
hike_route[, TIME := 1:.N]
ggplot(hike_route, aes(x = TIME, y = ELEVATION)) +
geom_line(linewidth = 2) +
theme_minimal()
# view standing hours -----------------------------------------------------
# plot
ggplot(summary_dat, aes(x = dateComponents, y = as.numeric(appleStandHours))) +
geom_point(color = "#7cb7a3", size = 2) +
geom_smooth(color = "#AE2573", fill = "#AE2573") +
xlab("Date") +
ylab("Standing Hours") +
theme_minimal()
@grcatlin
Copy link
Author

This code accompanies a blog post, found here 💻

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment