Skip to content

Instantly share code, notes, and snippets.

@boooeee
Last active June 29, 2021 05:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save boooeee/ab276ad9d4955de318863e6354e7ad8a to your computer and use it in GitHub Desktop.
Save boooeee/ab276ad9d4955de318863e6354e7ad8a to your computer and use it in GitHub Desktop.
Code for analyzing and charting your iPhone run data
rm(list = ls(all = TRUE))
library(XML)
library(tidyverse)
library(lubridate)
library(scales)
library(ggthemes)
library(ggridges)
library(rpart)
loc<-"" # enter file path location of export.xml file here #
xml <- xmlParse(paste0(loc, '/export.xml'))
rc <- data.frame(XML:::xmlAttrsToDataFrame(xml["//Record"]),stringsAsFactors = F)
# select Nike Run Club data #
nk<-rc %>%
filter(sourceName=="Nike Run Club",unit == "mi")
# format the data #
nk <- nk %>%
mutate(cdt=as_datetime(creationDate, tz="US/Pacific"),stm=as_datetime(startDate, tz="US/Pacific"),etm=as_datetime(endDate, tz="US/Pacific"),dst=as.numeric(as.character(value))) %>%
group_by(creationDate) %>%
mutate(cd=cumsum(dst)) %>% # cumulative distance covered #
mutate(mntm=min(stm)) %>% # start time for each run #%>% # time differential #
mutate(hr=hour(stm),min=minute(stm)) %>%
mutate(elt=as.numeric(etm-mntm)) %>% # total elapsed time #
mutate(dtm=as.numeric(etm-lag(etm))) %>%
ungroup()
# split each of your runs into a list element #
nkl<-split(nk,nk$creationDate)
# analyze your latest run #
rn<-nkl[[length(nkl)-0]]
# create loess smoothing of run data #
ls<-loess(cd ~ elt,data=rn,span=0.1)
# create smoothed data points and calculate speed #
rn <- rn %>%
mutate(prd = predict(ls,rn)) %>%
mutate(cdsm=prd-lag(prd), spd=cdsm/dtm, mph=spd*3600) %>%
mutate(rcdsm=cd-lag(cd), rspd=rcdsm/dtm, rmph=rspd*3600)
# plot your run #
# create plot title #
tt<-format(rn$cdt[1], format = "Your %B %d, %Y run")
# summary stats #
miles<-format(rn$cd[nrow(rn)],digits=2,nsmall=1)
time<-round(rn$elt[nrow(rn)]/60,0)
speed<-format(rn$cd[nrow(rn)]/rn$elt[nrow(rn)]*3600,digits=2,nsmall = 1)
pace<-format(rn$elt[nrow(rn)]/rn$cd[nrow(rn)]/60,digits=2,nsmall = 1)
# create subtitle #
sbt<-paste(miles," miles in ",time," minutes, ",pace," minutes per mile",sep="")
# speed over course of your run (the raw and the smoothed) #
ggplot(rn,aes(x=elt/60,y=mph)) +
geom_path(size=1.5) +
theme_minimal(base_size = 15) +
labs(x="minutes elapsed",y="miles per hour") +
theme(text=element_text(family="Open Sans")) +
ggtitle(tt,subtitle = sbt) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.subtitle = element_text(hjust = 0.5)) +
geom_point(aes(x=elt/60,y=rmph),color="gray")
ggsave(filename=paste(loc,"speedrawsmoothed.png",sep=""))
# determine run intervals using the CART algorithm #
rp<-rpart(mph ~ elt,data=rn,control = rpart.control(cp=0.05)) # adjust the cp parameter if the phases are over or under fit #
rn$rpp<-predict(rp,rn)
# add intervals and lables to your run dataset #
rna<-rn %>%
group_by(rpp) %>%
summarise(mnt=min(elt),mxt=max(elt),mnd=min(prd),mxd=max(prd)) %>%
mutate(dst=mxd-mnd,avt=(mxt+mnt)/2) %>%
mutate(gt=paste(format(dst,digits = 1)," miles @ ",format(rpp,digits=2)," mph",sep=""))
# speed over course of your run - with interval summaries #
ggplot(rn,aes(x=elt/60,y=mph)) +
geom_point(color="gray",size=0.5) +
geom_line(aes(x=elt/60,y=rpp)) +
geom_label(data=rna,aes(x=avt/60,y=rpp,label=gt),color="red",nudge_y = 0.3) +
theme_minimal() +
labs(x="minutes elapsed",y="miles per hour") +
theme(text=element_text(family="Open Sans")) +
ggtitle(tt,subtitle = sbt) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.subtitle = element_text(hjust = 0.5))
ggsave(filename=paste(loc,"speedphase.png",sep=""))
# distance over the course of your run - color coded speed #
ggplot(rn,aes(x=elt/60,y=prd)) +
geom_path(aes(color=mph),size=3) +
scale_color_gradient(low="blue",high="red",name="miles/hour") +
theme_minimal() +
labs(x="minutes elapsed",y="miles covered") +
theme(text=element_text(family="Open Sans")) +
ggtitle(tt,subtitle = sbt) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.subtitle = element_text(hjust = 0.5))
ggsave(filename=paste(loc,"speedcolor.png",sep=""))
# plot speed for your last nine runs #
rns<-nkl[(length(nkl)-8):(length(nkl)-0)]
# function for creating loess smoothing for each run #
ra <- function(x) {
ls<-loess(cd ~ elt,data=x,span=0.1)
x<-x %>%
mutate(prd = predict(ls,x)) %>%
mutate(cdsm=c(NA,diff(prd)), spd=cdsm/dtm, mph=spd*3600)
return(x)
}
# apply function over the list of runs #
rnl<-lapply(rns,ra)
rnd<-do.call('rbind',rnl)
# create chart labels #
rnd$dtf<-format(rnd$cdt, format = "%B %d, %Y")
ru<-unique(rnd[c("cdt","dtf")])
ru<-ru[order(ru$cdt),]
rnd$dtf<-factor(rnd$dtf,levels=ru$dtf)
# speed over course of your run #
ggplot(rnd,aes(x=elt/60,y=mph)) +
geom_path() +
theme_minimal(base_size=15) +
facet_wrap(~dtf) +
ggtitle("Speed versus Time for Your Last Nine Runs") +
labs(x="minutes elapsed",y="miles per hour") +
theme(text=element_text(family="Open Sans")) +
#ggtitle(tt,subtitle = sbt) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.subtitle = element_text(hjust = 0.5))
ggsave(filename=paste(loc,"speedtime9.png",sep=""))
# plot a distribution of when you run during the day #
ggplot(nk,aes(x=hr)) +
geom_bar(fill="blue") +
theme_minimal(base_size=15) +
labs(x="hour of the day") +
theme(text=element_text(family="Open Sans")) +
ggtitle("What time of day are you running?") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.subtitle = element_text(hjust = 0.5)) +
theme(axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank()) +
scale_x_continuous(labels=c(paste(c(5:11),"AM",sep=""),"12PM",paste(c(1:8),"PM",sep="")),breaks=c(5:20))
ggsave(filename=paste(loc,"timeofday.png",sep=""))
# summarize workouts -----
wk <- data.frame(XML:::xmlAttrsToDataFrame(xml["//Workout"]),stringsAsFactors = F)
# restrict by date #
dtfr<-"2020-07-01" # beginning of date range to analyze
dtto<-"2021-06-19" # end of date range to analyze
# remove speed outliers - set to very high number if you want to keep all data points #
mxnsd<-4 # threshhold for allowed number of standard deviations from mean (for workout speed) #
# split your runs into two time periods by choosing a cutoff date below #
cutoff<-"2021-01-01"
lbl1<-"Jul2020 to Dec2020" # label for dates prior to cutoff #
lbl2<-"Jan2021 to Jun2021" # label for dates after cutoff #
# summarize and clean workout data #
wkr<-wk %>%
mutate_at(c("duration","totalDistance","totalEnergyBurned"), as.numeric) %>%
mutate_at(c("creationDate","startDate","endDate"), as_datetime) %>%
mutate(speed=totalDistance/duration*60) %>%
filter(sourceName=="Nike Run Club") %>% # remove/alter for different run app #
filter(creationDate>=as_date(dtfr),creationDate<=as_date(dtto)) %>%
mutate(month=format(creationDate,format="%b")) %>%
mutate(nsd=abs(speed-mean(speed))/sd(speed)) %>% # calculate number of standard deviations from the mean #
filter(nsd<=mxnsd) %>%
mutate(lbl=ifelse(creationDate<as_datetime(cutoff),lbl1,lbl2))
# fit your runs to the Riegel formula #
wkr <- wkr %>%
mutate(lnv=log(speed),lnd=log(totalDistance))
# fit Riegel formula with fixed exponent of 0.06 #
lm1<-lm(I(lnv-0.06*lnd)~1,data=wkr)
# fit Riegel formula with best fit exponent #
lm2<-lm(lnv ~ lnd,data=wkr)
# fit each subsegment to its own Riegel line #
lm11<-lm(I(lnv+0.06*lnd)~1,data=wkr[wkr$lbl==lbl1,])
lm12<-lm(I(lnv+0.06*lnd)~1,data=wkr[wkr$lbl==lbl2,])
lm21<-lm(lnv ~ lnd,data=wkr[wkr$lbl==lbl1,])
lm22<-lm(lnv ~ lnd,data=wkr[wkr$lbl==lbl2,])
wkr <- wkr %>%
mutate(prd1=exp(predict(lm1,wkr)-0.06*lnd),prd2=exp(predict(lm2,wkr))) %>%
mutate(prd11=exp(predict(lm11,wkr)-0.06*lnd),prd12=exp(predict(lm12,wkr)-0.06*lnd)) %>%
mutate(prd21=exp(predict(lm21,wkr)),prd22=exp(predict(lm22,wkr))) %>%
mutate(dff1=speed-prd1,dff2=speed-prd2) %>% # identifying best runs - speed over expectation #
mutate(dftm1=totalDistance*(1/prd1-1/speed)*60,dftm2=totalDistance*(1/prd2-1/speed)*60) %>% # identifying best runs - seconds below expectation #
mutate(rnk1=rank(-dff1),rnk2=rank(-dff2),rnkt1=rank(-dftm1),rnkt2=rank(-dftm2))
wkr$lbl<-factor(wkr$lbl,levels=c(lbl1,lbl2))
# plot speed versus distance - Riegel formula with fixed exponent #
ggplot(wkr,aes(x=totalDistance,y=speed,color=lbl)) +
geom_point() +
scale_color_manual(values=c("red","blue")) +
theme_minimal() +
geom_line(aes(x=totalDistance,y=prd11),color="red") +
geom_line(aes(x=totalDistance,y=prd12),color="blue") +
theme(text=element_text(family="Open Sans")) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.subtitle = element_text(hjust = 0.5)) +
labs(x="distance covered (miles)",y="speed (miles per hour)") +
theme(legend.title = element_blank()) +
ggtitle("Distance versus speed, fit to Riegel formula",subtitle = "Using fixed exponent - 1.06")
ggsave(filename=paste(loc,"riegel.png",sep=""))
# plot speed versus distance - Riegel formula with best fit exponent #
ggplot(wkr,aes(x=totalDistance,y=speed,color=lbl)) +
geom_point() +
scale_color_manual(values=c("red","blue")) +
theme_minimal() +
geom_line(aes(x=totalDistance,y=prd21),color="red") +
geom_line(aes(x=totalDistance,y=prd22),color="blue") +
theme(text=element_text(family="Open Sans")) +
theme(plot.title = element_text(hjust = 0.5)) +
theme(plot.subtitle = element_text(hjust = 0.5)) +
labs(x="distance covered (miles)",y="speed (miles per hour)") +
theme(legend.title = element_blank()) +
ggtitle("Distance versus speed, fit to Riegel formula",subtitle = "Exponent represents best fit to data")
ggsave(filename=paste(loc,"riegel_bestfit.png",sep=""))
# summarize your best runs in a gt table #
bm<-3.1 # best time at [bm] miles #
bf <-function(x) {
# create loess smoothing of run data #
ls<-loess(cd ~ elt,data=x,span=0.1)
x2 <- x %>%
mutate(prd = predict(ls,x)) %>%
mutate(cdsm=c(NA,diff(prd)), spd=cdsm/dtm, mph=spd*3600) %>%
mutate(rcdsm=c(NA,diff(cd)), rspd=rcdsm/dtm, rmph=rspd*3600) %>%
crossing(select(x,ncd=cd,nelt=elt,nstm=stm)) %>%
filter(ncd>=cd+bm) %>%
group_by(stm) %>%
slice_head(n=1) %>%
ungroup() %>%
mutate(dff=nelt-elt) %>%
arrange(elt) %>%
slice_head(n=1) %>%
select(cdt,stm,nstm,elt,nelt,cd,ncd,dff) %>%
mutate(mins=dff/60)
return(x2)
}
fnk<-lapply(nkl,bf)
fnkd<-bind_rows(fnk)
# sort by fastest runs #
fnkd<-arrange(fnkd,mins)
library(gt)
library(webshot) # load this library if you want to save your table as png file #
gtfst <- fnkd %>%
mutate(strt=elt/60,endt=nelt/60,spd=ncd/dff*3600,cdt=as_date(cdt)) %>%
select(-stm,-nstm,-cd,-elt,-nelt,-dff,-ncd) %>%
relocate(mins, .after=endt) %>%
slice_head(n=10) %>%
gt() %>%
tab_header(title=paste0("Your 10 fastest ",bm," mile runs")) %>%
fmt_date(columns=cdt, date_style=3) %>%
fmt_number(columns=c(mins,strt,endt),decimals = 1,pattern="{x} minutes") %>%
fmt_number(columns=spd,decimals=2,pattern="{x} mph") %>%
cols_label(cdt="workout date",mins="run time",strt="start time",endt="end time",spd="speed") %>%
cols_align(columns=c(strt:spd),align="center")
gtsave(gtfst,file="fastest_table.png", path = loc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment