Skip to content

Instantly share code, notes, and snippets.

@psychemedia

psychemedia/ons.py

Last active Mar 27, 2016
Embed
What would you like to do?
Making a start on ONS JSON pandas wrapper
from pandas import read_json
import urllib.parse
import requests
from pandas.io.json import json_normalize
class UKONSReader():
"""
Download data series from the UK Office of National Statistics
"""
def __init__(self):
""" Need to think more about the structure of this... """
pass
def atoz_url(self,letter=None):
"""
Get the URL for an A to Z listing of available stats, returning result as JSON data feed
Parameters
----------
letter: character
Optionally return just the listing for items starting with a particular letter
"""
url='http://www.ons.gov.uk/atoz/data'
if letter is not None:
#?Better to construct a proper requests dict?
#check letter is valid
url='{}?az={}'.format(url,letter)
return url
def search_url(self,query='unemployment',stype='all',sfilter='',sort='relevance',page=1):
"""
Get the URL for a search on available available stats, returning result as JSON data feed
Parameters
----------
query: string
search term
stype: string
``all`` downloads all results; also: ``data`` and ``publications``, multiple items comma separated
sfilter: string
``timeseries`` or ``datasets`` or ``user_requested_data``
sort: string
``relevance`` or ``title`` or ``release_date``
page: integer
results page number
"""
#Allow the user to specify 'publication' OR 'publications'
spref= stype.rstrip('s') if stype.rstrip('s') in ['data','publication'] else ''
#Need to construct URLs more robustly
url="http://www.ons.gov.uk/search{spref}/data?q={query}&page={page}&sortBy={sort}".format(spref=spref,
query=query,
sort=sort,
page=page)
#Separate out filter items
sfilter=[s.strip() for s in sfilter.split(',')]
for s in sfilter:
url='{url}&filter={s}'.format(url=url,s=s)
return url
def search_one(self, query='unemployment', stype='all', sfilter='', sort='relevance', page=1):
"""
Search available data series from the UK Office of National Statistics, returning one page
Parameters
----------
query: string
search term
stype: string
``all`` downloads all results; also: ``data`` and ``publications``
sfilter: string
``timeseries`` or ``datasets`` or ``user_requested_data``, multiple items comma separated
In the constructed URL, multiple filter terms should be concatenated as distinct filter terms.
For example, ``&filter=time_series&filter=datasets``
sort: string
``relevance`` or ``title`` or ``release_date``
page: integer
results page number
"""
url=self.search_url(query,stype,sfilter,sort,page)
jj=requests.get(url).json()
return json_normalize(jj['result']['results'])
def search_all(self,query='unemployment', stype='all', sfilter='', sort='relevance'):
"""
Search available data series from the UK Office of National Statistics, returning all pages
Parameters
----------
query: string
search term
stype: string
``all`` downloads all results; also: ``data`` and ``publications``
sfilter: string
``timeseries`` or ``datasets`` or ``user_requested_data``, multiple items comma separated
In the constructed URL, multiple filter terms should be concatenated as distinct filter terms.
For example, ``&filter=time_series&filter=datasets``
sort: string
``relevance`` or ``title`` or ``release_date``
"""
url=self.search_url(query,stype,sfilter,1)
jj=requests.get(url).json()
pages=range(2,1+jj['result']['paginator']['numberOfPages'])
df=pd.DataFrame()
for p in pages:
df=pd.concat([df,self.search_one(query=query, stype=stype,sfilter=sfilter, page=p)])
return df
def data_file_url(self,stub='/economy/inflationandpriceindices/timeseries/d7g7',ftype='csv'):
"""
Get the URL for a data file of a specified type for a given statistic
Parameters
----------
stub: string
Path to required statistic
ftype: string
Desired filetype; one of ``csv``, ``xls``
"""
url='http://www.ons.gov.uk/generator?format={ftype}&uri={stub}'.format(ftype=ftype,stub=stub)
return url
def data_json_url(self,stub='/economy/inflationandpriceindices/timeseries/d7g7'):
"""
Get the URL for a JSON data feed for a given statistic
Parameters
----------
stub: string
Path to required statistic
"""
url='http://www.ons.gov.uk/{}/data'.format(stub.strip('/'))
return url
#Are timeseries all represented the same way? If so, create a timeseries handler
def get_data_feed(self,stub='/economy/inflationandpriceindices/timeseries/d7g7'):
"""
Get the URL for a JSON data feed
Parameters
----------
stub: string
Path to required statistic
"""
url=self.data_json_url(stub)
jj=requests.get(url).json()
return jj
#guess at the following without tests
def get_timeseries_data_feed(self,stub='/economy/inflationandpriceindices/timeseries/d7g7'):
"""
Get the URL for a JSON data feed for a timeseries dataset
Parameters
----------
stub: string
Path to required statistic
"""
#I'm not sure about the handling of statistic frequency?
#Can it be specified in a URL? Can it be detected in results metadata?
#Hard code for now - until it breaks! (Add error handler/logger?)
period='months'
#Need to better handle requests not apparently for timeseries
if 'timeseries' not in stub: return
url=self.data_json_url(stub)
jj=requests.get(url).json()
return json_normalize(jj[period])
def get_chart_url(self,stub='/economy/inflationandpriceindices/timeseries/d7g7',
series=None,fromMonth=None,fromYear=None,toMonth=None,toYear=None,frequency=None):
"""
Get the URL for time series chart image file
Parameters
----------
stub: string
Path to required statistic
series: string?
NOT SURE WHAT THIS DOES?
fromMonth: string
Month number as two digit string e.g. 01, 12
fromYear: integer
Four digit integer e.g. 1971
toMonth: string
Month number as two digit string e.g. 01, 12
toYear: integer
Four digit integer e.g. 1971
frequency: string
``months``
"""
#Need to better handle this
url='https://www.ons.gov.uk/{stub}/linechartimage?'.format(stub=stub.strip('/'))
all_params={'series':series,
'fromMonth':fromMonth,
'toMonth':toMonth,
'fromYear':fromYear,
'toYear':toYear,
'frequency':frequency}
params={}
for k in all_params:
if all_params[k] is not None: params[k]=all_params[k]
url=url+urllib.parse.urlencode(params)
return url
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.