Skip to content

Instantly share code, notes, and snippets.

@drw
Created May 3, 2017 18:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save drw/580d466ef9be2099ced4db2f427e3953 to your computer and use it in GitHub Desktop.
Save drw/580d466ef9be2099ced4db2f427e3953 to your computer and use it in GitHub Desktop.
# This script shows how to get data from a private CKAN repository
# using the datastore_search API endpoint. It then pulls the data
# records out of the JSON response and casts fields to particular
# types, renaming and manipulating some along the way.
library(jsonlite)
library(httr)
source("authentication.R") # This file should define CKAN_API_key.
# By putting this in a separate file, you can easily prevent
# it from being added to your git repository. (Just add the line
#
# authentication.R
#
# to your .gitignore file before you add any other files under git.)
authorize_json_request <- function(url,API_key) { # This function dies with an error
# when there is no Internet connection.
req <- httr::GET(url, httr::add_headers(Authorization = API_key))
json <- httr::content(req, as = "text")
json_data <- fromJSON(json)
return(json_data)
}
# CKAN API pitfalls: Always set the limit parameter in the query.
# Otherwise you're just going to wind up with the default number
# of results (100).
get_site_stats <- function() {
# Pull WPRDC stats from a dedicated data repository on wprdc.org.
json_file <- "https://data.wprdc.org/api/action/datastore_search?resource_id=865441c9-498a-4a3f-8f52-3a865c1c421a&limit=9999"
json_data <- authorize_json_request(json_file,CKAN_API_key)
if(exists("json_data")) {
site_stats <- json_data$result$records
site_stats$`average session duration (minutes)` <- as.numeric(site_stats$`Average session duration (seconds)`)/60
site_stats$`average session duration (minutes)` <- round(x=site_stats$`average session duration (minutes)`, digits = 2)
site_stats$`pageviews per session` <- as.numeric(site_stats$`Pageviews per session`)
site_stats$`pageviews per session` <- round(x=site_stats$`pageviews per session`, digits = 2)
site_stats$users <- as.integer(site_stats$Users)
site_stats$pageviews <- as.integer(site_stats$Pageviews)
site_stats$sessions <- as.integer(site_stats$Sessions)
site_stats <- site_stats[c("Year+month","users","sessions","pageviews","pageviews per session","average session duration (minutes)")]
# site_stats$`year/month` <- paste(substring(site_stats$`Year+month`,1,4),substring(site_stats$`Year+month`,5,6),sep='/')
site_stats$year <- as.integer(substring(site_stats$`Year+month`,1,4))
site_stats$month <- as.integer(substring(site_stats$`Year+month`,5,6))
} else {
site_stats <- NULL
}
return(site_stats)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment