Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Load Apple Health Kit export.xml file in R then analyze and visualize Steps Data using R. See the full post here: http://www.ryanpraski.com/apple-health-data-how-to-export-analyze-visualize-guide/
library(dplyr)
library(ggplot2)
library(lubridate)
library(XML)
#load apple health export.xml file
xml <- xmlParse("C:\\Users\\praskry\\Desktop\\apple_health_data\\export.xml")
#transform xml file to data frame - select the Record rows from the xml file
df <- XML:::xmlAttrsToDataFrame(xml["//Record"])
str(df)
#make value variable numeric
df$value <- as.numeric(as.character(df$value))
str(df)
#make endDate in a date time variable POSIXct using lubridate with eastern time zone
df$endDate <-ymd_hms(df$endDate,tz="America/New_York")
str(df)
##add in year month date dayofweek hour columns
df$month<-format(df$endDate,"%m")
df$year<-format(df$endDate,"%Y")
df$date<-format(df$endDate,"%Y-%m-%d")
df$dayofweek <-wday(df$endDate, label=TRUE, abbr=FALSE)
df$hour <-format(df$endDate,"%H")
str(df)
#show steps by month by year using dplyr then graph using ggplot2
df %>%
filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
group_by(year,month) %>%
summarize(steps=sum(value)) %>%
#print table steps by month by year
print (n=100) %>%
#graph data by month by year
ggplot(aes(x=month, y=steps, fill=year)) +
geom_bar(position='dodge', stat='identity') +
scale_y_continuous(labels = scales::comma) +
scale_fill_brewer() +
theme_bw() +
theme(panel.grid.major = element_blank())
#boxplot data by month by year
df %>%
filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
group_by(date,month,year) %>%
summarize(steps=sum(value)) %>%
#print table steps by date by month by year
print (n=100) %>%
ggplot(aes(x=month, y=steps)) +
geom_boxplot(aes(fill=(year))) +
scale_fill_brewer() +
theme_bw() +
theme(panel.grid.major = element_blank())
#summary statistics by month for 2015
df %>%
filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
group_by(date,month,year) %>%
summarize(steps=sum(value)) %>%
filter(year==2015) %>%
group_by(month) %>%
summarize(mean = round(mean(steps), 2), sd = round(sd(steps), 2),
median = round(median(steps), 2), max = round(max(steps), 2),
min = round(min(steps), 2),`25%`= quantile(steps, probs=0.25),
`75%`= quantile(steps, probs=0.75))
#boxplot data by day of week year
df %>%
filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
group_by(dayofweek,date,year) %>%
summarize(steps=sum(value)) %>%
#print table steps by date by month by year
print (n=100) %>%
ggplot(aes(x=dayofweek, y=steps)) +
geom_boxplot(aes(fill=(year))) +
scale_fill_brewer() +
theme_bw() +
theme(panel.grid.major = element_blank())
#summary statistics by day of week for 2015
df %>%
filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
group_by(dayofweek,date,year) %>%
summarize(steps=sum(value)) %>%
filter(year==2015) %>%
group_by(dayofweek) %>%
summarize(mean = round(mean(steps), 2), sd = round(sd(steps), 2),
median = round(median(steps), 2), max = round(max(steps), 2),
min = round(min(steps), 2),`25%`= quantile(steps, probs=0.25),
`75%`= quantile(steps, probs=0.75)) %>%
arrange(desc(median))
#heatmap day of week hour of day
df %>%
filter(type == 'HKQuantityTypeIdentifierStepCount') %>%
group_by(date,dayofweek,hour) %>%
summarize(steps=sum(value)) %>%
group_by(hour,dayofweek) %>%
summarize(steps=sum(steps)) %>%
arrange(desc(steps)) %>%
#print table steps by date by month by year
print (n=100) %>%
ggplot(aes(x=dayofweek, y=hour, fill=steps)) +
geom_tile() +
scale_fill_continuous(labels = scales::comma, low = 'white', high = 'red') +
theme_bw() +
theme(panel.grid.major = element_blank())
@deepankardatta

This comment has been minimized.

Copy link

deepankardatta commented Dec 26, 2017

Hi. Liking your work. Was wondering if you were keen to help create an actual CRAN-level package to help analyse Apple Health data?

(see https://github.com/deepankardatta/AppleHealthAnalysis)

@benjaminwnelson

This comment has been minimized.

Copy link

benjaminwnelson commented Feb 21, 2018

Great work! Do you know how to format the data for an interday file that has summary variables for steps, resting heart rate, etc. as well as a file that has intraday file that has heart rate, steps, etc. every 10 minutes?

@Aminaba2016

This comment has been minimized.

Copy link

Aminaba2016 commented Apr 8, 2019

how can i export data to xml file??

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.