Skip to content

Instantly share code, notes, and snippets.

@paddycarey
Created September 19, 2012 22:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save paddycarey/3752817 to your computer and use it in GitHub Desktop.
Save paddycarey/3752817 to your computer and use it in GitHub Desktop.
Simple python wrapper for Northern Ireland Assembly Open Data
"""
Copyright (c) 2012, Patrick Carey
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
"""
"""
The data and information available through data.niassembly.gov.uk are
available under terms described in the Open Northern Ireland Assembly Licence.
You are free to:
Copy, publish, distribute and transmit the Information
Adapt the Information
Exploit the Information commercially
Please see http://data.niassembly.gov.uk/license.aspx for further details
"""
# stdlib imports
import json
# Third party imports
import requests
import unidecode
import xmltodict
api_methods = {
# Member methods
'GetAllCurrentCommitteeChairs': ('/members.asmx/GetAllCurrentCommitteeChairs?%s%s', 'xml'),
'GetAllCurrentMembers': ('/members.asmx/GetAllCurrentMembers_JSON?%s%s', 'json'),
'GetAllCurrentMembersByGivenConstituencyId': ('/members.asmx/GetAllCurrentMembersByGivenConstituencyId_JSON?constituencyId=%s%s', 'json'),
'GetAllCurrentMembersByGivenPartyId': ('/members.asmx/GetAllCurrentMembersByGivenPartyId_JSON?partyId=%s%s', 'json'),
'GetAllCurrentMembersBySurnameSearch': ('/members.asmx/GetAllCurrentMembersBySurnameSearch_JSON?searchText=%s%s', 'json'),
'GetAllCurrentMinisters': ('/members.asmx/GetAllCurrentMinisters?%s%s', 'xml'),
'GetAllMembersByGivenDate': ('/members.asmx/GetAllMembersByGivenDate_JSON?specificDate=%s%s', 'json'),
'GetMemberRolesByPersonId': ('/members.asmx/GetMemberRolesByPersonId_JSON?personId=%s%s', 'json'),
# Question methods
'GetQuestionDetails': ('/questions.asmx/GetQuestionDetails_JSON?documentId=%s%s', 'json'),
'GetQuestionsByMember': ('/questions.asmx/GetQuestionsByMember_JSON?personId=%s%s', 'json'),
'GetQuestionsBySearchText': ('/questions.asmx/GetQuestionsBySearchText_JSON?searchText=%s%s', 'json'),
'GetQuestionsForOralAnswer_AnsweredInRange': ('/questions.asmx/GetQuestionsForOralAnswer_AnsweredInRange_JSON?startDate=%s&endDate=%s', 'json'),
'GetQuestionsForOralAnswer_TabledInRange': ('/questions.asmx/GetQuestionsForOralAnswer_TabledInRange_JSON?startDate=%s&endDate=%s', 'json'),
'GetQuestionsForWrittenAnswer_AnsweredInRange': ('/questions.asmx/GetQuestionsForWrittenAnswer_AnsweredInRange_JSON?startDate=%s&endDate=%s', 'json'),
'GetQuestionsForWrittenAnswer_TabledInRange': ('/questions.asmx/GetQuestionsForWrittenAnswer_TabledInRange_JSON?startDate=%s&endDate=%s', 'json'),
'GetWrittenAnswerHtml': ('/questions.asmx/GetWrittenAnswerHtml?documentId=%s%s', 'html'),
# Organisation methods
'GetAllPartyGroupsListCurrent': ('/organisations.asmx/GetAllPartyGroupsListCurrent_JSON?%s%s', 'json'),
'GetCommitteesListCurrent_AdHoc': ('/organisations.asmx/GetCommitteesListCurrent_AdHoc_JSON?%s%s', 'json'),
'GetCommitteesListCurrent_Standing': ('/organisations.asmx/GetCommitteesListCurrent_Standing_JSON?%s%s', 'json'),
'GetCommitteesListCurrent_Statutory': ('/organisations.asmx/GetCommitteesListCurrent_Statutory_JSON?%s%s', 'json'),
'GetDepartmentListCurrent': ('/organisations.asmx/GetDepartmentListCurrent_JSON?%s%s', 'json'),
'GetPartiesListCurrent': ('/organisations.asmx/GetPartiesListCurrent_JSON?%s%s', 'json'),
}
class Scraper(object):
"""
Simple wrapper for the data.niassembly.gov.uk open data APIs.
"""
# Base URL for all API calls
base_url = 'http://data.niassembly.gov.uk'
def __init__(self, api_method, api_arg1='', api_arg2=''):
# make params available to our class methods
self.api_method = api_method
self.api_arg1 = str(api_arg1)
self.api_arg2 = str(api_arg2)
# Dict mapping api types to parsing functions
api_type = {
'html': self.getdata_html,
'json': self.getdata_json,
'xml': self.getdata_xml,
}
# Make our API calls
self.api_call = api_methods[api_method]
self.data = api_type[self.api_call[1]]()
def getdata_raw(self):
# use requests to make a get request to the API endpoint
return requests.get(self.base_url + self.api_call[0] % (self.api_arg1, self.api_arg2))
def getdata_html(self):
# TODO: Do some simple html parsing
return self.getdata_raw().text
def getdata_json(self):
# the API encapsulates the json response in some
# extraneous text (JSONP) so lets remove it
response_text = self.getdata_raw().text.replace('?(', '').replace(');', '')
return json.loads(response_text)
def getdata_xml(self):
# xmltodict is picky about unicode, so let's get rid of it
response_text = unidecode.unidecode(self.getdata_raw().text)
# parse the xml into an ordered dict
xml_dict = xmltodict.parse(response_text)
# we don't really want an ordered dict so convert to a standard
# dict by dumping to and reparsing from JSON (yes I realise this
# seems odd but is the simplest way to deal with arbitrary levels
# of nesting in an ordered dict)
return json.loads(json.dumps(xml_dict))
if __name__ == '__main__':
# Simple example usage
scraper = Scraper('GetMemberRolesByPersonId', 108)
print json.dumps(scraper.data, indent=2)
scraper = Scraper('GetAllCurrentMembers')
print json.dumps(scraper.data, indent=2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment