Last active
August 29, 2015 14:15
-
-
Save looselycoupled/4b21c022ca8744eb1d01 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Model answer for REST workshop using OpenCalais | |
""" | |
########################################################################## | |
## Imports | |
########################################################################## | |
import requests | |
########################################################################## | |
## Module Variables/Constants | |
########################################################################## | |
DOJ_RELEASES_URL = 'http://www.justice.gov/api/v1/press_releases.json?pagesize=5' | |
OPEN_CALAIS_SERVICE_URL = 'http://api.opencalais.com/tag/rs/enrich' | |
OPEN_CALAIS_KEY = 'ADD_API_KEY_HERE' | |
########################################################################## | |
## Functions | |
########################################################################## | |
def fetch_press_releases(): | |
""" | |
Performs a GET on the DOJ web service and return the array found in the | |
'results' attribute of the JSON response | |
""" | |
# execute a GET request and store the results | |
response = requests.get(DOJ_RELEASES_URL) | |
# decode as json and store the results | |
data = response.json() | |
# return the 'results' array of press releases | |
return data['results'] | |
def fetch_semantic_entities(content): | |
""" | |
Performs a POST on the Open Calais service using the supplied text and | |
then returns the decoded response as a dict. | |
""" | |
# setup a dict to hold the header values | |
headers = { | |
'x-calais-licenseID': OPEN_CALAIS_KEY, | |
'accept': 'application/json', | |
'content-type' : 'TEXT/HTML' | |
} | |
# ensure the content is encoded for utf-8 | |
encoded_content = content.encode('utf-8') | |
# execute a POST to the Open Calais service | |
response = requests.post(OPEN_CALAIS_SERVICE_URL, | |
data=encoded_content, headers=headers) | |
# decode as json and return a dict | |
return response.json() | |
def print_entities(data): | |
""" | |
Iterates through a dict and prints out the Type and Name of an | |
entity if the appropriate attributes were found. | |
""" | |
for key,value in data.iteritems(): | |
if '_typeGroup' in value and value['_typeGroup'] == 'entities': | |
print '%s: %s' % (value['_type'], value['name']) | |
def main(): | |
""" | |
Main execution function to perform required actions | |
""" | |
# fetch array of press releases | |
press_releases = fetch_press_releases() | |
# iterate press releases | |
for release in press_releases: | |
# fetch entities using Open Calais API | |
entities = fetch_semantic_entities(release['body']) | |
# print the title and entities found | |
print '\n' + release['title'] | |
print_entities(entities) | |
########################################################################## | |
## Execution | |
########################################################################## | |
if __name__ == '__main__': | |
main() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment