Skip to content

Instantly share code, notes, and snippets.

@psychemedia
Last active December 20, 2015 02:39
Show Gist options
  • Save psychemedia/6057436 to your computer and use it in GitHub Desktop.
Save psychemedia/6057436 to your computer and use it in GitHub Desktop.
First hack at trying to right some R functions to wrap the OpenSpending API
require(RCurl)
require(rjson)
#Starting to hack some routines around the OpenSpending API
#to try to make it a little easier to play with the data using R
#get datasets
#USAGE:
##datasets.json=okf.os.getDatasetsJSON()
##gbdatasets.json=okf.os.getDatasetsJSON(territories='gb')
#This function is not terribly helpful
#We need to pass the JSON to another function for parsing...
okf.os.getDatasetsJSON=function(territories=NA,languages=NA){
url='http://openspending.org/datasets.json?'
#Filtering dataset lookup by territory is allowed.
#The filter seems to be case sensitive, so go defensive
if (!is.na(territories)) url=paste(url,'territories=',toupper(territories),sep='')
#As is filtering by language - should we force tolower here?
if (!is.na(languages)) url=paste(url,'&languages=',languages,sep='')
rawdata=getURL(url)
#We probably need to be able to handle paging for large numbers of results?
##Which is to say - it may well be that not all datsets are returned atm...
fromJSON(rawdata)
}
#EXAMPLE: get a list of the first DEFAULT(?) number of datasets
datasets.json=okf.os.getDatasetsJSON()
#EXAMPLE: get a list of datsets relating to GB territory
gbdatasets.json=okf.os.getDatasetsJSON(territories='GB')
#Get something more useful out of the datasets JSON result
#For example, what territories are covered by the listed datasets?
#USAGE:
##territories.df=okf.os.getDatasetsTerritoriesFromJSON(datasets.json)
okf.os.getTerritoriesFromJSON=function(jsondata){
#initialise a data frame
territories.df <- data.frame(count=numeric(),
url=character(),
label=character(),
code=character(),
stringsAsFactors=FALSE)
jtr=jsondata$territories
for (i in 1:length(jtr)){
territories.df=rbind(territories.df,
data.frame(
count=jtr[[i]]$count,
url=jtr[[i]]$url,
label=jtr[[i]]$label,
code=jtr[[i]]$code
)
)
}
territories
}
#EXAMPLE: generate a dataframe identifying the territories covered by
##the datasets listed in the datasets JSON.
territories.df=okf.os.getDatasetsTerritoriesFromJSON(datasets.json)
#Usage:
##gbdatasets.df=okf.os.getDatasetsFromJSON(gbdatasets.json)
okf.os.getDatasetsFromJSON=function(jsondata){
#Initialise datasets dataframe
datasets.df <- data.frame(
name=character(),
label=character(),
category=character(),
description=character(),
currency=character(),
html_url=character(),
#In the following cases we may get a list()
#Currently fudge handling these in the populate phase.
languages=character(),
territories=character(),
stringsAsFactors=FALSE)
#Populate datsets dataframe
jdr=jsondata$datasets
for (i in 1:length(jdr)){
datasets.df=rbind(datasets.df,
data.frame(
name=jdr[[i]]$name,
label=jdr[[i]]$label,
category=jdr[[i]]$category,
description=jdr[[i]]$description,
currency=jdr[[i]]$currency,
html_url=jdr[[i]]$html_url,
languages=paste(jdr[[i]]$languages,collapse='::'),
territories=paste(jdr[[i]]$territories,collapse='::')
)
)
}
datasets.df
}
#EXAMPLE: generate a dataframe listing the datasets identified in the
## datasets lookup JSON data.
gbdatasets.df=okf.os.getDatasetsFromJSON(gbdatasets.json)
#Get data aggregate
##/api/2/aggregate?dataset=ukgov-finances-cra&cut=time.year:2010
#Could we request this as CSV instead?
#No working demos yet - what should we be expected to see?
#What are some good lookups/simple settings to try?
okf.os.getAggregateJSON=function(dataset,year=NA,cut=NA,measure=NA,drilldown=NA,page=NA,pagesize=NA,order=NA){
#Need to trap for is.na(dataset) - if it is NA, exit the function
url=paste('http://openspending.org','/api/2/aggregate?dataset=',dataset,sep='')
##TESTING - only release when USAGE example demonstrated
#Temporary hack on year... really need to build up cut from higher level args, such as year
##That is, we shouldn't expect user to have to remember things like time.year: ?
#if (!is.na(year)) url=paste(url,'&cut=time.year:',year,sep='')
if (!is.na(year[1])) url=paste(url,'&cut=',paste('time.year:',year,collapse="|",sep=''),sep='')
#if (!is.na(cut)) url=paste(url,'&cut=',cut,sep='')
#if (!is.na(page)) url=paste(url,'&page=',page,sep='')
#if (!is.na(drilldown)) url=paste(url,'&drilldown=',drilldown,sep='')
#if (!is.na(measure)) url=paste(url,'&measure=',measure,sep='')
#if (!is.na(order)) url=paste(url,'&order=',order,sep='')
#if (!is.na(pagesize)) url=paste(url,'&pagesize=',pagesize,sep='')
##print(url)
rawdata=getURL(url)
#We probably need to be able to handle paging for large numbers of results?
fromJSON(rawdata)
}
#CANDIDATE DATASET FOR WORKED EXAMPLES: ukgov-25k-spending
#okf.os.getAggregateJSON('ukgov-25k-spending')
gb.2011.json=okf.os.getAggregateJSON('ukgov-25k-spending','2011')
gb.2010_11.json=okf.os.getAggregateJSON('ukgov-25k-spending',c('2011','2010'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment