Created
September 19, 2012 22:44
-
-
Save paddycarey/3752817 to your computer and use it in GitHub Desktop.
Simple python wrapper for Northern Ireland Assembly Open Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Copyright (c) 2012, Patrick Carey | |
Permission to use, copy, modify, and/or distribute this software for any | |
purpose with or without fee is hereby granted, provided that the above | |
copyright notice and this permission notice appear in all copies. | |
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR | |
IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
""" | |
""" | |
The data and information available through data.niassembly.gov.uk are | |
available under terms described in the Open Northern Ireland Assembly Licence. | |
You are free to: | |
Copy, publish, distribute and transmit the Information | |
Adapt the Information | |
Exploit the Information commercially | |
Please see http://data.niassembly.gov.uk/license.aspx for further details | |
""" | |
# stdlib imports | |
import json | |
# Third party imports | |
import requests | |
import unidecode | |
import xmltodict | |
api_methods = { | |
# Member methods | |
'GetAllCurrentCommitteeChairs': ('/members.asmx/GetAllCurrentCommitteeChairs?%s%s', 'xml'), | |
'GetAllCurrentMembers': ('/members.asmx/GetAllCurrentMembers_JSON?%s%s', 'json'), | |
'GetAllCurrentMembersByGivenConstituencyId': ('/members.asmx/GetAllCurrentMembersByGivenConstituencyId_JSON?constituencyId=%s%s', 'json'), | |
'GetAllCurrentMembersByGivenPartyId': ('/members.asmx/GetAllCurrentMembersByGivenPartyId_JSON?partyId=%s%s', 'json'), | |
'GetAllCurrentMembersBySurnameSearch': ('/members.asmx/GetAllCurrentMembersBySurnameSearch_JSON?searchText=%s%s', 'json'), | |
'GetAllCurrentMinisters': ('/members.asmx/GetAllCurrentMinisters?%s%s', 'xml'), | |
'GetAllMembersByGivenDate': ('/members.asmx/GetAllMembersByGivenDate_JSON?specificDate=%s%s', 'json'), | |
'GetMemberRolesByPersonId': ('/members.asmx/GetMemberRolesByPersonId_JSON?personId=%s%s', 'json'), | |
# Question methods | |
'GetQuestionDetails': ('/questions.asmx/GetQuestionDetails_JSON?documentId=%s%s', 'json'), | |
'GetQuestionsByMember': ('/questions.asmx/GetQuestionsByMember_JSON?personId=%s%s', 'json'), | |
'GetQuestionsBySearchText': ('/questions.asmx/GetQuestionsBySearchText_JSON?searchText=%s%s', 'json'), | |
'GetQuestionsForOralAnswer_AnsweredInRange': ('/questions.asmx/GetQuestionsForOralAnswer_AnsweredInRange_JSON?startDate=%s&endDate=%s', 'json'), | |
'GetQuestionsForOralAnswer_TabledInRange': ('/questions.asmx/GetQuestionsForOralAnswer_TabledInRange_JSON?startDate=%s&endDate=%s', 'json'), | |
'GetQuestionsForWrittenAnswer_AnsweredInRange': ('/questions.asmx/GetQuestionsForWrittenAnswer_AnsweredInRange_JSON?startDate=%s&endDate=%s', 'json'), | |
'GetQuestionsForWrittenAnswer_TabledInRange': ('/questions.asmx/GetQuestionsForWrittenAnswer_TabledInRange_JSON?startDate=%s&endDate=%s', 'json'), | |
'GetWrittenAnswerHtml': ('/questions.asmx/GetWrittenAnswerHtml?documentId=%s%s', 'html'), | |
# Organisation methods | |
'GetAllPartyGroupsListCurrent': ('/organisations.asmx/GetAllPartyGroupsListCurrent_JSON?%s%s', 'json'), | |
'GetCommitteesListCurrent_AdHoc': ('/organisations.asmx/GetCommitteesListCurrent_AdHoc_JSON?%s%s', 'json'), | |
'GetCommitteesListCurrent_Standing': ('/organisations.asmx/GetCommitteesListCurrent_Standing_JSON?%s%s', 'json'), | |
'GetCommitteesListCurrent_Statutory': ('/organisations.asmx/GetCommitteesListCurrent_Statutory_JSON?%s%s', 'json'), | |
'GetDepartmentListCurrent': ('/organisations.asmx/GetDepartmentListCurrent_JSON?%s%s', 'json'), | |
'GetPartiesListCurrent': ('/organisations.asmx/GetPartiesListCurrent_JSON?%s%s', 'json'), | |
} | |
class Scraper(object): | |
""" | |
Simple wrapper for the data.niassembly.gov.uk open data APIs. | |
""" | |
# Base URL for all API calls | |
base_url = 'http://data.niassembly.gov.uk' | |
def __init__(self, api_method, api_arg1='', api_arg2=''): | |
# make params available to our class methods | |
self.api_method = api_method | |
self.api_arg1 = str(api_arg1) | |
self.api_arg2 = str(api_arg2) | |
# Dict mapping api types to parsing functions | |
api_type = { | |
'html': self.getdata_html, | |
'json': self.getdata_json, | |
'xml': self.getdata_xml, | |
} | |
# Make our API calls | |
self.api_call = api_methods[api_method] | |
self.data = api_type[self.api_call[1]]() | |
def getdata_raw(self): | |
# use requests to make a get request to the API endpoint | |
return requests.get(self.base_url + self.api_call[0] % (self.api_arg1, self.api_arg2)) | |
def getdata_html(self): | |
# TODO: Do some simple html parsing | |
return self.getdata_raw().text | |
def getdata_json(self): | |
# the API encapsulates the json response in some | |
# extraneous text (JSONP) so lets remove it | |
response_text = self.getdata_raw().text.replace('?(', '').replace(');', '') | |
return json.loads(response_text) | |
def getdata_xml(self): | |
# xmltodict is picky about unicode, so let's get rid of it | |
response_text = unidecode.unidecode(self.getdata_raw().text) | |
# parse the xml into an ordered dict | |
xml_dict = xmltodict.parse(response_text) | |
# we don't really want an ordered dict so convert to a standard | |
# dict by dumping to and reparsing from JSON (yes I realise this | |
# seems odd but is the simplest way to deal with arbitrary levels | |
# of nesting in an ordered dict) | |
return json.loads(json.dumps(xml_dict)) | |
if __name__ == '__main__': | |
# Simple example usage | |
scraper = Scraper('GetMemberRolesByPersonId', 108) | |
print json.dumps(scraper.data, indent=2) | |
scraper = Scraper('GetAllCurrentMembers') | |
print json.dumps(scraper.data, indent=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment