Created
May 3, 2017 18:46
-
-
Save drw/580d466ef9be2099ced4db2f427e3953 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script shows how to get data from a private CKAN repository | |
# using the datastore_search API endpoint. It then pulls the data | |
# records out of the JSON response and casts fields to particular | |
# types, renaming and manipulating some along the way. | |
library(jsonlite) | |
library(httr) | |
source("authentication.R") # This file should define CKAN_API_key. | |
# By putting this in a separate file, you can easily prevent | |
# it from being added to your git repository. (Just add the line | |
# | |
# authentication.R | |
# | |
# to your .gitignore file before you add any other files under git.) | |
authorize_json_request <- function(url,API_key) { # This function dies with an error | |
# when there is no Internet connection. | |
req <- httr::GET(url, httr::add_headers(Authorization = API_key)) | |
json <- httr::content(req, as = "text") | |
json_data <- fromJSON(json) | |
return(json_data) | |
} | |
# CKAN API pitfalls: Always set the limit parameter in the query. | |
# Otherwise you're just going to wind up with the default number | |
# of results (100). | |
get_site_stats <- function() { | |
# Pull WPRDC stats from a dedicated data repository on wprdc.org. | |
json_file <- "https://data.wprdc.org/api/action/datastore_search?resource_id=865441c9-498a-4a3f-8f52-3a865c1c421a&limit=9999" | |
json_data <- authorize_json_request(json_file,CKAN_API_key) | |
if(exists("json_data")) { | |
site_stats <- json_data$result$records | |
site_stats$`average session duration (minutes)` <- as.numeric(site_stats$`Average session duration (seconds)`)/60 | |
site_stats$`average session duration (minutes)` <- round(x=site_stats$`average session duration (minutes)`, digits = 2) | |
site_stats$`pageviews per session` <- as.numeric(site_stats$`Pageviews per session`) | |
site_stats$`pageviews per session` <- round(x=site_stats$`pageviews per session`, digits = 2) | |
site_stats$users <- as.integer(site_stats$Users) | |
site_stats$pageviews <- as.integer(site_stats$Pageviews) | |
site_stats$sessions <- as.integer(site_stats$Sessions) | |
site_stats <- site_stats[c("Year+month","users","sessions","pageviews","pageviews per session","average session duration (minutes)")] | |
# site_stats$`year/month` <- paste(substring(site_stats$`Year+month`,1,4),substring(site_stats$`Year+month`,5,6),sep='/') | |
site_stats$year <- as.integer(substring(site_stats$`Year+month`,1,4)) | |
site_stats$month <- as.integer(substring(site_stats$`Year+month`,5,6)) | |
} else { | |
site_stats <- NULL | |
} | |
return(site_stats) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment