psychemedia/opendspendingAPI_test.R

## opendspendingAPI_test.R
require(RCurl)
require(rjson)

#Starting to hack some routines around the OpenSpending API
#to try to make it a little easier to play with the data using R

#get datasets
#USAGE:
##datasets.json=okf.os.getDatasetsJSON()
##gbdatasets.json=okf.os.getDatasetsJSON(territories='gb')
#This function is not terribly helpful
#We need to pass the JSON to another function for parsing...
okf.os.getDatasetsJSON=function(territories=NA,languages=NA){
  url='http://openspending.org/datasets.json?'

  #Filtering dataset lookup by territory is allowed.
  #The filter seems to be case sensitive, so go defensive
  if (!is.na(territories)) url=paste(url,'territories=',toupper(territories),sep='')
  #As is filtering by language - should we force tolower here?
  if (!is.na(languages)) url=paste(url,'&languages=',languages,sep='')

  rawdata=getURL(url)
  #We probably need to be able to handle paging for large numbers of results?
  ##Which is to say - it may well be that not all datsets are returned atm...

  fromJSON(rawdata)
}

#EXAMPLE: get a list of the first DEFAULT(?) number of datasets
datasets.json=okf.os.getDatasetsJSON()

#EXAMPLE: get a list of datsets relating to GB territory
gbdatasets.json=okf.os.getDatasetsJSON(territories='GB')

#Get something more useful out of the datasets JSON result
#For example, what territories are covered by the listed datasets?
#USAGE:
##territories.df=okf.os.getDatasetsTerritoriesFromJSON(datasets.json)
okf.os.getTerritoriesFromJSON=function(jsondata){
  #initialise a data frame
  territories.df <- data.frame(count=numeric(),
                    url=character(),
                    label=character(),
                    code=character(),
                    stringsAsFactors=FALSE)


  jtr=jsondata$territories
  for (i in 1:length(jtr)){
    territories.df=rbind(territories.df,
      data.frame(
        count=jtr[[i]]$count,
        url=jtr[[i]]$url,
        label=jtr[[i]]$label,
        code=jtr[[i]]$code
      )
    )
  }
  territories
}

#EXAMPLE: generate a dataframe identifying the territories covered by
##the datasets listed in the datasets JSON.
territories.df=okf.os.getDatasetsTerritoriesFromJSON(datasets.json)

#Usage:
##gbdatasets.df=okf.os.getDatasetsFromJSON(gbdatasets.json)
okf.os.getDatasetsFromJSON=function(jsondata){
  #Initialise datasets dataframe
  datasets.df <- data.frame(
                      name=character(),
                      label=character(),
                      category=character(),
                      description=character(),
                      currency=character(),
                      html_url=character(),
                      #In the following cases we may get a list()
                      #Currently fudge handling these in the populate phase.
                      languages=character(),
                      territories=character(),
                      stringsAsFactors=FALSE)
  #Populate datsets dataframe
  jdr=jsondata$datasets
  for (i in 1:length(jdr)){
    datasets.df=rbind(datasets.df,
                 data.frame(
                      name=jdr[[i]]$name,
                      label=jdr[[i]]$label,
                      category=jdr[[i]]$category,
                      description=jdr[[i]]$description,
                      currency=jdr[[i]]$currency,
                      html_url=jdr[[i]]$html_url,
                      languages=paste(jdr[[i]]$languages,collapse='::'),
                      territories=paste(jdr[[i]]$territories,collapse='::')
                  )
    )
  }
  datasets.df
}

#EXAMPLE: generate a dataframe listing the datasets identified in the
## datasets lookup JSON data.
gbdatasets.df=okf.os.getDatasetsFromJSON(gbdatasets.json)

#Get data aggregate
##/api/2/aggregate?dataset=ukgov-finances-cra&cut=time.year:2010
#Could we request this as CSV instead?
#No working demos yet - what should we be expected to see?
#What are some good lookups/simple settings to try?
okf.os.getAggregateJSON=function(dataset,year=NA,cut=NA,measure=NA,drilldown=NA,page=NA,pagesize=NA,order=NA){
  #Need to trap for is.na(dataset) - if it is NA, exit the function

  url=paste('http://openspending.org','/api/2/aggregate?dataset=',dataset,sep='')

  ##TESTING - only release when USAGE example demonstrated

  #Temporary hack on year... really need to build up cut from higher level args, such as year
  ##That is, we shouldn't expect user to have to remember things like time.year: ?
  #if (!is.na(year)) url=paste(url,'&cut=time.year:',year,sep='')
  if (!is.na(year[1])) url=paste(url,'&cut=',paste('time.year:',year,collapse="|",sep=''),sep='')
  #if (!is.na(cut)) url=paste(url,'&cut=',cut,sep='')
  #if (!is.na(page)) url=paste(url,'&page=',page,sep='')
  #if (!is.na(drilldown)) url=paste(url,'&drilldown=',drilldown,sep='')
  #if (!is.na(measure)) url=paste(url,'&measure=',measure,sep='')
  #if (!is.na(order)) url=paste(url,'&order=',order,sep='')
  #if (!is.na(pagesize)) url=paste(url,'&pagesize=',pagesize,sep='')

  ##print(url)
  rawdata=getURL(url)
  #We probably need to be able to handle paging for large numbers of results?

  fromJSON(rawdata)
}


#CANDIDATE DATASET FOR WORKED EXAMPLES: ukgov-25k-spending
#okf.os.getAggregateJSON('ukgov-25k-spending')
gb.2011.json=okf.os.getAggregateJSON('ukgov-25k-spending','2011')
gb.2010_11.json=okf.os.getAggregateJSON('ukgov-25k-spending',c('2011','2010'))
	require(RCurl)
	require(rjson)

	#Starting to hack some routines around the OpenSpending API
	#to try to make it a little easier to play with the data using R

	#get datasets
	#USAGE:
	##datasets.json=okf.os.getDatasetsJSON()
	##gbdatasets.json=okf.os.getDatasetsJSON(territories='gb')
	#This function is not terribly helpful
	#We need to pass the JSON to another function for parsing...
	okf.os.getDatasetsJSON=function(territories=NA,languages=NA){
	url='http://openspending.org/datasets.json?'

	#Filtering dataset lookup by territory is allowed.
	#The filter seems to be case sensitive, so go defensive
	if (!is.na(territories)) url=paste(url,'territories=',toupper(territories),sep='')
	#As is filtering by language - should we force tolower here?
	if (!is.na(languages)) url=paste(url,'&languages=',languages,sep='')

	rawdata=getURL(url)
	#We probably need to be able to handle paging for large numbers of results?
	##Which is to say - it may well be that not all datsets are returned atm...

	fromJSON(rawdata)
	}

	#EXAMPLE: get a list of the first DEFAULT(?) number of datasets
	datasets.json=okf.os.getDatasetsJSON()

	#EXAMPLE: get a list of datsets relating to GB territory
	gbdatasets.json=okf.os.getDatasetsJSON(territories='GB')

	#Get something more useful out of the datasets JSON result
	#For example, what territories are covered by the listed datasets?
	#USAGE:
	##territories.df=okf.os.getDatasetsTerritoriesFromJSON(datasets.json)
	okf.os.getTerritoriesFromJSON=function(jsondata){
	#initialise a data frame
	territories.df <- data.frame(count=numeric(),
	url=character(),
	label=character(),
	code=character(),
	stringsAsFactors=FALSE)


	jtr=jsondata$territories
	for (i in 1:length(jtr)){
	territories.df=rbind(territories.df,
	data.frame(
	count=jtr[[i]]$count,
	url=jtr[[i]]$url,
	label=jtr[[i]]$label,
	code=jtr[[i]]$code
	)
	)
	}
	territories
	}

	#EXAMPLE: generate a dataframe identifying the territories covered by
	##the datasets listed in the datasets JSON.
	territories.df=okf.os.getDatasetsTerritoriesFromJSON(datasets.json)

	#Usage:
	##gbdatasets.df=okf.os.getDatasetsFromJSON(gbdatasets.json)
	okf.os.getDatasetsFromJSON=function(jsondata){
	#Initialise datasets dataframe
	datasets.df <- data.frame(
	name=character(),
	label=character(),
	category=character(),
	description=character(),
	currency=character(),
	html_url=character(),
	#In the following cases we may get a list()
	#Currently fudge handling these in the populate phase.
	languages=character(),
	territories=character(),
	stringsAsFactors=FALSE)
	#Populate datsets dataframe
	jdr=jsondata$datasets
	for (i in 1:length(jdr)){
	datasets.df=rbind(datasets.df,
	data.frame(
	name=jdr[[i]]$name,
	label=jdr[[i]]$label,
	category=jdr[[i]]$category,
	description=jdr[[i]]$description,
	currency=jdr[[i]]$currency,
	html_url=jdr[[i]]$html_url,
	languages=paste(jdr[[i]]$languages,collapse='::'),
	territories=paste(jdr[[i]]$territories,collapse='::')
	)
	)
	}
	datasets.df
	}

	#EXAMPLE: generate a dataframe listing the datasets identified in the
	## datasets lookup JSON data.
	gbdatasets.df=okf.os.getDatasetsFromJSON(gbdatasets.json)

	#Get data aggregate
	##/api/2/aggregate?dataset=ukgov-finances-cra&cut=time.year:2010
	#Could we request this as CSV instead?
	#No working demos yet - what should we be expected to see?
	#What are some good lookups/simple settings to try?
	okf.os.getAggregateJSON=function(dataset,year=NA,cut=NA,measure=NA,drilldown=NA,page=NA,pagesize=NA,order=NA){
	#Need to trap for is.na(dataset) - if it is NA, exit the function

	url=paste('http://openspending.org','/api/2/aggregate?dataset=',dataset,sep='')

	##TESTING - only release when USAGE example demonstrated

	#Temporary hack on year... really need to build up cut from higher level args, such as year
	##That is, we shouldn't expect user to have to remember things like time.year: ?
	#if (!is.na(year)) url=paste(url,'&cut=time.year:',year,sep='')
	if (!is.na(year[1])) url=paste(url,'&cut=',paste('time.year:',year,collapse="\|",sep=''),sep='')
	#if (!is.na(cut)) url=paste(url,'&cut=',cut,sep='')
	#if (!is.na(page)) url=paste(url,'&page=',page,sep='')
	#if (!is.na(drilldown)) url=paste(url,'&drilldown=',drilldown,sep='')
	#if (!is.na(measure)) url=paste(url,'&measure=',measure,sep='')
	#if (!is.na(order)) url=paste(url,'&order=',order,sep='')
	#if (!is.na(pagesize)) url=paste(url,'&pagesize=',pagesize,sep='')

	##print(url)
	rawdata=getURL(url)
	#We probably need to be able to handle paging for large numbers of results?

	fromJSON(rawdata)
	}


	#CANDIDATE DATASET FOR WORKED EXAMPLES: ukgov-25k-spending
	#okf.os.getAggregateJSON('ukgov-25k-spending')
	gb.2011.json=okf.os.getAggregateJSON('ukgov-25k-spending','2011')
	gb.2010_11.json=okf.os.getAggregateJSON('ukgov-25k-spending',c('2011','2010'))