paddycarey/niaopendata.py

## niaopendata.py
"""
    Copyright (c) 2012, Patrick Carey

    Permission to use, copy, modify, and/or distribute this software for any
    purpose with or without fee is hereby granted, provided that the above
    copyright notice and this permission notice appear in all copies.

    THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
    IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
"""

"""
    The data and information available through data.niassembly.gov.uk are
    available under terms described in the Open Northern Ireland Assembly Licence.

    You are free to:
        Copy, publish, distribute and transmit the Information
        Adapt the Information
        Exploit the Information commercially

    Please see http://data.niassembly.gov.uk/license.aspx for further details
"""

# stdlib imports
import json

# Third party imports
import requests
import unidecode
import xmltodict


api_methods = {

    # Member methods
    'GetAllCurrentCommitteeChairs': ('/members.asmx/GetAllCurrentCommitteeChairs?%s%s', 'xml'),
    'GetAllCurrentMembers': ('/members.asmx/GetAllCurrentMembers_JSON?%s%s', 'json'),
    'GetAllCurrentMembersByGivenConstituencyId': ('/members.asmx/GetAllCurrentMembersByGivenConstituencyId_JSON?constituencyId=%s%s', 'json'),
    'GetAllCurrentMembersByGivenPartyId': ('/members.asmx/GetAllCurrentMembersByGivenPartyId_JSON?partyId=%s%s', 'json'),
    'GetAllCurrentMembersBySurnameSearch': ('/members.asmx/GetAllCurrentMembersBySurnameSearch_JSON?searchText=%s%s', 'json'),
    'GetAllCurrentMinisters': ('/members.asmx/GetAllCurrentMinisters?%s%s', 'xml'),
    'GetAllMembersByGivenDate': ('/members.asmx/GetAllMembersByGivenDate_JSON?specificDate=%s%s', 'json'),
    'GetMemberRolesByPersonId': ('/members.asmx/GetMemberRolesByPersonId_JSON?personId=%s%s', 'json'),

    # Question methods
    'GetQuestionDetails': ('/questions.asmx/GetQuestionDetails_JSON?documentId=%s%s', 'json'),
    'GetQuestionsByMember': ('/questions.asmx/GetQuestionsByMember_JSON?personId=%s%s', 'json'),
    'GetQuestionsBySearchText': ('/questions.asmx/GetQuestionsBySearchText_JSON?searchText=%s%s', 'json'),
    'GetQuestionsForOralAnswer_AnsweredInRange': ('/questions.asmx/GetQuestionsForOralAnswer_AnsweredInRange_JSON?startDate=%s&endDate=%s', 'json'),
    'GetQuestionsForOralAnswer_TabledInRange': ('/questions.asmx/GetQuestionsForOralAnswer_TabledInRange_JSON?startDate=%s&endDate=%s', 'json'),
    'GetQuestionsForWrittenAnswer_AnsweredInRange': ('/questions.asmx/GetQuestionsForWrittenAnswer_AnsweredInRange_JSON?startDate=%s&endDate=%s', 'json'),
    'GetQuestionsForWrittenAnswer_TabledInRange': ('/questions.asmx/GetQuestionsForWrittenAnswer_TabledInRange_JSON?startDate=%s&endDate=%s', 'json'),
    'GetWrittenAnswerHtml': ('/questions.asmx/GetWrittenAnswerHtml?documentId=%s%s', 'html'),

    # Organisation methods
    'GetAllPartyGroupsListCurrent': ('/organisations.asmx/GetAllPartyGroupsListCurrent_JSON?%s%s', 'json'),
    'GetCommitteesListCurrent_AdHoc': ('/organisations.asmx/GetCommitteesListCurrent_AdHoc_JSON?%s%s', 'json'),
    'GetCommitteesListCurrent_Standing': ('/organisations.asmx/GetCommitteesListCurrent_Standing_JSON?%s%s', 'json'),
    'GetCommitteesListCurrent_Statutory': ('/organisations.asmx/GetCommitteesListCurrent_Statutory_JSON?%s%s', 'json'),
    'GetDepartmentListCurrent': ('/organisations.asmx/GetDepartmentListCurrent_JSON?%s%s', 'json'),
    'GetPartiesListCurrent': ('/organisations.asmx/GetPartiesListCurrent_JSON?%s%s', 'json'),

}


class Scraper(object):

    """
    Simple wrapper for the data.niassembly.gov.uk open data APIs.
    """

    # Base URL for all API calls
    base_url = 'http://data.niassembly.gov.uk'

    def __init__(self, api_method, api_arg1='', api_arg2=''):

        # make params available to our class methods
        self.api_method = api_method
        self.api_arg1 = str(api_arg1)
        self.api_arg2 = str(api_arg2)

        # Dict mapping api types to parsing functions
        api_type = {
            'html': self.getdata_html,
            'json': self.getdata_json,
            'xml': self.getdata_xml,
        }

        # Make our API calls
        self.api_call = api_methods[api_method]
        self.data = api_type[self.api_call[1]]()

    def getdata_raw(self):
        # use requests to make a get request to the API endpoint
        return requests.get(self.base_url + self.api_call[0] % (self.api_arg1, self.api_arg2))

    def getdata_html(self):
        # TODO: Do some simple html parsing
        return self.getdata_raw().text

    def getdata_json(self):
        # the API encapsulates the json response in some
        # extraneous text (JSONP) so lets remove it
        response_text = self.getdata_raw().text.replace('?(', '').replace(');', '')
        return json.loads(response_text)

    def getdata_xml(self):
        # xmltodict is picky about unicode, so let's get rid of it
        response_text = unidecode.unidecode(self.getdata_raw().text)
        # parse the xml into an ordered dict
        xml_dict = xmltodict.parse(response_text)
        # we don't really want an ordered dict so convert to a standard
        # dict by dumping to and reparsing from JSON (yes I realise this
        # seems odd but is the simplest way to deal with arbitrary levels
        # of nesting in an ordered dict)
        return json.loads(json.dumps(xml_dict))


if __name__ == '__main__':

    # Simple example usage
    scraper = Scraper('GetMemberRolesByPersonId', 108)
    print json.dumps(scraper.data, indent=2)

    scraper = Scraper('GetAllCurrentMembers')
    print json.dumps(scraper.data, indent=2)
	"""
	Copyright (c) 2012, Patrick Carey

	Permission to use, copy, modify, and/or distribute this software for any
	purpose with or without fee is hereby granted, provided that the above
	copyright notice and this permission notice appear in all copies.

	THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
	WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
	MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
	ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
	WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
	ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR
	IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
	"""

	"""
	The data and information available through data.niassembly.gov.uk are
	available under terms described in the Open Northern Ireland Assembly Licence.

	You are free to:
	Copy, publish, distribute and transmit the Information
	Adapt the Information
	Exploit the Information commercially

	Please see http://data.niassembly.gov.uk/license.aspx for further details
	"""

	# stdlib imports
	import json

	# Third party imports
	import requests
	import unidecode
	import xmltodict


	api_methods = {

	# Member methods
	'GetAllCurrentCommitteeChairs': ('/members.asmx/GetAllCurrentCommitteeChairs?%s%s', 'xml'),
	'GetAllCurrentMembers': ('/members.asmx/GetAllCurrentMembers_JSON?%s%s', 'json'),
	'GetAllCurrentMembersByGivenConstituencyId': ('/members.asmx/GetAllCurrentMembersByGivenConstituencyId_JSON?constituencyId=%s%s', 'json'),
	'GetAllCurrentMembersByGivenPartyId': ('/members.asmx/GetAllCurrentMembersByGivenPartyId_JSON?partyId=%s%s', 'json'),
	'GetAllCurrentMembersBySurnameSearch': ('/members.asmx/GetAllCurrentMembersBySurnameSearch_JSON?searchText=%s%s', 'json'),
	'GetAllCurrentMinisters': ('/members.asmx/GetAllCurrentMinisters?%s%s', 'xml'),
	'GetAllMembersByGivenDate': ('/members.asmx/GetAllMembersByGivenDate_JSON?specificDate=%s%s', 'json'),
	'GetMemberRolesByPersonId': ('/members.asmx/GetMemberRolesByPersonId_JSON?personId=%s%s', 'json'),

	# Question methods
	'GetQuestionDetails': ('/questions.asmx/GetQuestionDetails_JSON?documentId=%s%s', 'json'),
	'GetQuestionsByMember': ('/questions.asmx/GetQuestionsByMember_JSON?personId=%s%s', 'json'),
	'GetQuestionsBySearchText': ('/questions.asmx/GetQuestionsBySearchText_JSON?searchText=%s%s', 'json'),
	'GetQuestionsForOralAnswer_AnsweredInRange': ('/questions.asmx/GetQuestionsForOralAnswer_AnsweredInRange_JSON?startDate=%s&endDate=%s', 'json'),
	'GetQuestionsForOralAnswer_TabledInRange': ('/questions.asmx/GetQuestionsForOralAnswer_TabledInRange_JSON?startDate=%s&endDate=%s', 'json'),
	'GetQuestionsForWrittenAnswer_AnsweredInRange': ('/questions.asmx/GetQuestionsForWrittenAnswer_AnsweredInRange_JSON?startDate=%s&endDate=%s', 'json'),
	'GetQuestionsForWrittenAnswer_TabledInRange': ('/questions.asmx/GetQuestionsForWrittenAnswer_TabledInRange_JSON?startDate=%s&endDate=%s', 'json'),
	'GetWrittenAnswerHtml': ('/questions.asmx/GetWrittenAnswerHtml?documentId=%s%s', 'html'),

	# Organisation methods
	'GetAllPartyGroupsListCurrent': ('/organisations.asmx/GetAllPartyGroupsListCurrent_JSON?%s%s', 'json'),
	'GetCommitteesListCurrent_AdHoc': ('/organisations.asmx/GetCommitteesListCurrent_AdHoc_JSON?%s%s', 'json'),
	'GetCommitteesListCurrent_Standing': ('/organisations.asmx/GetCommitteesListCurrent_Standing_JSON?%s%s', 'json'),
	'GetCommitteesListCurrent_Statutory': ('/organisations.asmx/GetCommitteesListCurrent_Statutory_JSON?%s%s', 'json'),
	'GetDepartmentListCurrent': ('/organisations.asmx/GetDepartmentListCurrent_JSON?%s%s', 'json'),
	'GetPartiesListCurrent': ('/organisations.asmx/GetPartiesListCurrent_JSON?%s%s', 'json'),

	}


	class Scraper(object):

	"""
	Simple wrapper for the data.niassembly.gov.uk open data APIs.
	"""

	# Base URL for all API calls
	base_url = 'http://data.niassembly.gov.uk'

	def __init__(self, api_method, api_arg1='', api_arg2=''):

	# make params available to our class methods
	self.api_method = api_method
	self.api_arg1 = str(api_arg1)
	self.api_arg2 = str(api_arg2)

	# Dict mapping api types to parsing functions
	api_type = {
	'html': self.getdata_html,
	'json': self.getdata_json,
	'xml': self.getdata_xml,
	}

	# Make our API calls
	self.api_call = api_methods[api_method]
	self.data = api_type[self.api_call[1]]()

	def getdata_raw(self):
	# use requests to make a get request to the API endpoint
	return requests.get(self.base_url + self.api_call[0] % (self.api_arg1, self.api_arg2))

	def getdata_html(self):
	# TODO: Do some simple html parsing
	return self.getdata_raw().text

	def getdata_json(self):
	# the API encapsulates the json response in some
	# extraneous text (JSONP) so lets remove it
	response_text = self.getdata_raw().text.replace('?(', '').replace(');', '')
	return json.loads(response_text)

	def getdata_xml(self):
	# xmltodict is picky about unicode, so let's get rid of it
	response_text = unidecode.unidecode(self.getdata_raw().text)
	# parse the xml into an ordered dict
	xml_dict = xmltodict.parse(response_text)
	# we don't really want an ordered dict so convert to a standard
	# dict by dumping to and reparsing from JSON (yes I realise this
	# seems odd but is the simplest way to deal with arbitrary levels
	# of nesting in an ordered dict)
	return json.loads(json.dumps(xml_dict))


	if __name__ == '__main__':

	# Simple example usage
	scraper = Scraper('GetMemberRolesByPersonId', 108)
	print json.dumps(scraper.data, indent=2)

	scraper = Scraper('GetAllCurrentMembers')
	print json.dumps(scraper.data, indent=2)