Skip to content

Instantly share code, notes, and snippets.

@joebrew
Created August 4, 2017 16:35
Show Gist options
  • Save joebrew/23838dd796b761c0f97569d2fadc7ddf to your computer and use it in GitHub Desktop.
Save joebrew/23838dd796b761c0f97569d2fadc7ddf to your computer and use it in GitHub Desktop.
Get lots of geographic activity data from the Strava API
library(tidyverse)
# https://github.com/fawda123/rStrava
library(rStrava)
library(yaml)
library(feather)
options(scipen = '999')
# Get access / authentication token for Strava's API
credentials <- yaml::yaml.load_file(input = 'credentials/credentials.yaml')
# create the authentication token
# (on first run, will open browswer - thereafter, will rely on saved file)
setwd('credentials')
if(".httr-oauth" %in% dir(all.files = TRUE)){
stoken <- httr::config(token = strava_oauth(app_name = credentials$category,
app_client_id = credentials$client_id,
app_secret = credentials$client_secret,
cache = TRUE))
} else {
stoken <- httr::config(token = readRDS('.httr-oauth')[[1]])
}
setwd('..')
# For using google's API (elevation, etc.), get key
# save the key, do only once
# only need to run the below once:
# cat(paste0("google_key=",
# credentials$google_key,
# "\n"),
# file=file.path(normalizePath("~/"), ".Renviron"),
# append=TRUE)
mykey <- Sys.getenv("google_key")
# Get all friends activities
friends_activities <- get_activity_list(stoken = stoken,
# id = ids[239],
friends = TRUE)
activity_ids <- unlist(lapply(friends_activities, function(x){x$id}))
# Get the googlemaps polylines for a
polyate <- function(activity_id){
this_activity <- get_activity(id = activity_id, stoken = stoken)
this_polyline <- this_activity$map$summary_polyline
decoded_polyline <- gepaf::decodePolyline(this_polyline)
return(decoded_polyline)
}
# Define a wrapper function for getting the lat/lon
# of multiple activities
ll_from_activity_ids <- function(activity_ids = c(1110000567, 1110000568),
sleep = 0){
counter <- 1
total <- length(activity_ids)
# Create a left side dataframe
# so that we don't repeat the query
# out_list <- list()
for (i in 1:total){
message(paste0(i, ' of ', total))
this_activity_id <- activity_ids[i]
left <- data.frame(activity_id = this_activity_id)
tried <- try({
right <- polyate(activity_id = this_activity_id) %>%
mutate(activity_id = this_activity_id)
})
if(class(tried) == 'try-error'){
right <- data.frame(lat = NA,
lon = NA,
activity_id = this_activity_id)
if(grepl('429', tried[1])){
# Sleep 15 minutes then break
message('Sleeping for 1 minute beginning at ',
Sys.time())
Sys.sleep(60)
# break
}
}
# out_list[[i]] <- out
counter <- counter + 1
done <- left_join(x = left,
y = right,
by = 'activity_id')
df <- bind_rows(df, done)
assign('df',
df,
envir = .GlobalEnv)
write_feather(df, 'data/data.feather')
# Every 5000, save a new backup
if(length(unique(df$activity_id)) %% 5000 == 0){
write_feather(df, paste0('data/backups/', Sys.time(), '.feather'))
}
Sys.sleep(sleep)
}
}
# If no previously existing data,
# create some
# Run for one activity id
if(!'data.feather' %in% dir('data')){
df <- ll_from_activity_ids(activity_ids = 1110000567)
write_feather(x = df,
path = 'data/data.feather')
} else {
df <- read_feather('data/data.feather')
}
# Get new data
activity_ids <-
1110000000:(1110000000-10000000)
# Filter down to those not already in my database
activity_ids <- activity_ids[!activity_ids %in% df$activity_id]
# Do these requests over 24 hours
# time_between <- length(activity_ids) / (24 * 60 * 60)
time_between <- 1.2
# Get new data
ll_from_activity_ids(activity_ids = activity_ids,
sleep = time_between)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment