Last active
December 20, 2015 02:39
-
-
Save psychemedia/6057436 to your computer and use it in GitHub Desktop.
First hack at trying to right some R functions to wrap the OpenSpending API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require(RCurl) | |
require(rjson) | |
#Starting to hack some routines around the OpenSpending API | |
#to try to make it a little easier to play with the data using R | |
#get datasets | |
#USAGE: | |
##datasets.json=okf.os.getDatasetsJSON() | |
##gbdatasets.json=okf.os.getDatasetsJSON(territories='gb') | |
#This function is not terribly helpful | |
#We need to pass the JSON to another function for parsing... | |
okf.os.getDatasetsJSON=function(territories=NA,languages=NA){ | |
url='http://openspending.org/datasets.json?' | |
#Filtering dataset lookup by territory is allowed. | |
#The filter seems to be case sensitive, so go defensive | |
if (!is.na(territories)) url=paste(url,'territories=',toupper(territories),sep='') | |
#As is filtering by language - should we force tolower here? | |
if (!is.na(languages)) url=paste(url,'&languages=',languages,sep='') | |
rawdata=getURL(url) | |
#We probably need to be able to handle paging for large numbers of results? | |
##Which is to say - it may well be that not all datsets are returned atm... | |
fromJSON(rawdata) | |
} | |
#EXAMPLE: get a list of the first DEFAULT(?) number of datasets | |
datasets.json=okf.os.getDatasetsJSON() | |
#EXAMPLE: get a list of datsets relating to GB territory | |
gbdatasets.json=okf.os.getDatasetsJSON(territories='GB') | |
#Get something more useful out of the datasets JSON result | |
#For example, what territories are covered by the listed datasets? | |
#USAGE: | |
##territories.df=okf.os.getDatasetsTerritoriesFromJSON(datasets.json) | |
okf.os.getTerritoriesFromJSON=function(jsondata){ | |
#initialise a data frame | |
territories.df <- data.frame(count=numeric(), | |
url=character(), | |
label=character(), | |
code=character(), | |
stringsAsFactors=FALSE) | |
jtr=jsondata$territories | |
for (i in 1:length(jtr)){ | |
territories.df=rbind(territories.df, | |
data.frame( | |
count=jtr[[i]]$count, | |
url=jtr[[i]]$url, | |
label=jtr[[i]]$label, | |
code=jtr[[i]]$code | |
) | |
) | |
} | |
territories | |
} | |
#EXAMPLE: generate a dataframe identifying the territories covered by | |
##the datasets listed in the datasets JSON. | |
territories.df=okf.os.getDatasetsTerritoriesFromJSON(datasets.json) | |
#Usage: | |
##gbdatasets.df=okf.os.getDatasetsFromJSON(gbdatasets.json) | |
okf.os.getDatasetsFromJSON=function(jsondata){ | |
#Initialise datasets dataframe | |
datasets.df <- data.frame( | |
name=character(), | |
label=character(), | |
category=character(), | |
description=character(), | |
currency=character(), | |
html_url=character(), | |
#In the following cases we may get a list() | |
#Currently fudge handling these in the populate phase. | |
languages=character(), | |
territories=character(), | |
stringsAsFactors=FALSE) | |
#Populate datsets dataframe | |
jdr=jsondata$datasets | |
for (i in 1:length(jdr)){ | |
datasets.df=rbind(datasets.df, | |
data.frame( | |
name=jdr[[i]]$name, | |
label=jdr[[i]]$label, | |
category=jdr[[i]]$category, | |
description=jdr[[i]]$description, | |
currency=jdr[[i]]$currency, | |
html_url=jdr[[i]]$html_url, | |
languages=paste(jdr[[i]]$languages,collapse='::'), | |
territories=paste(jdr[[i]]$territories,collapse='::') | |
) | |
) | |
} | |
datasets.df | |
} | |
#EXAMPLE: generate a dataframe listing the datasets identified in the | |
## datasets lookup JSON data. | |
gbdatasets.df=okf.os.getDatasetsFromJSON(gbdatasets.json) | |
#Get data aggregate | |
##/api/2/aggregate?dataset=ukgov-finances-cra&cut=time.year:2010 | |
#Could we request this as CSV instead? | |
#No working demos yet - what should we be expected to see? | |
#What are some good lookups/simple settings to try? | |
okf.os.getAggregateJSON=function(dataset,year=NA,cut=NA,measure=NA,drilldown=NA,page=NA,pagesize=NA,order=NA){ | |
#Need to trap for is.na(dataset) - if it is NA, exit the function | |
url=paste('http://openspending.org','/api/2/aggregate?dataset=',dataset,sep='') | |
##TESTING - only release when USAGE example demonstrated | |
#Temporary hack on year... really need to build up cut from higher level args, such as year | |
##That is, we shouldn't expect user to have to remember things like time.year: ? | |
#if (!is.na(year)) url=paste(url,'&cut=time.year:',year,sep='') | |
if (!is.na(year[1])) url=paste(url,'&cut=',paste('time.year:',year,collapse="|",sep=''),sep='') | |
#if (!is.na(cut)) url=paste(url,'&cut=',cut,sep='') | |
#if (!is.na(page)) url=paste(url,'&page=',page,sep='') | |
#if (!is.na(drilldown)) url=paste(url,'&drilldown=',drilldown,sep='') | |
#if (!is.na(measure)) url=paste(url,'&measure=',measure,sep='') | |
#if (!is.na(order)) url=paste(url,'&order=',order,sep='') | |
#if (!is.na(pagesize)) url=paste(url,'&pagesize=',pagesize,sep='') | |
##print(url) | |
rawdata=getURL(url) | |
#We probably need to be able to handle paging for large numbers of results? | |
fromJSON(rawdata) | |
} | |
#CANDIDATE DATASET FOR WORKED EXAMPLES: ukgov-25k-spending | |
#okf.os.getAggregateJSON('ukgov-25k-spending') | |
gb.2011.json=okf.os.getAggregateJSON('ukgov-25k-spending','2011') | |
gb.2010_11.json=okf.os.getAggregateJSON('ukgov-25k-spending',c('2011','2010')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment