Skip to content

Instantly share code, notes, and snippets.

@looselycoupled
Last active August 29, 2015 14:15
Show Gist options
  • Save looselycoupled/4b21c022ca8744eb1d01 to your computer and use it in GitHub Desktop.
Save looselycoupled/4b21c022ca8744eb1d01 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Model answer for REST workshop using OpenCalais
"""
##########################################################################
## Imports
##########################################################################
import requests
##########################################################################
## Module Variables/Constants
##########################################################################
DOJ_RELEASES_URL = 'http://www.justice.gov/api/v1/press_releases.json?pagesize=5'
OPEN_CALAIS_SERVICE_URL = 'http://api.opencalais.com/tag/rs/enrich'
OPEN_CALAIS_KEY = 'ADD_API_KEY_HERE'
##########################################################################
## Functions
##########################################################################
def fetch_press_releases():
"""
Performs a GET on the DOJ web service and return the array found in the
'results' attribute of the JSON response
"""
# execute a GET request and store the results
response = requests.get(DOJ_RELEASES_URL)
# decode as json and store the results
data = response.json()
# return the 'results' array of press releases
return data['results']
def fetch_semantic_entities(content):
"""
Performs a POST on the Open Calais service using the supplied text and
then returns the decoded response as a dict.
"""
# setup a dict to hold the header values
headers = {
'x-calais-licenseID': OPEN_CALAIS_KEY,
'accept': 'application/json',
'content-type' : 'TEXT/HTML'
}
# ensure the content is encoded for utf-8
encoded_content = content.encode('utf-8')
# execute a POST to the Open Calais service
response = requests.post(OPEN_CALAIS_SERVICE_URL,
data=encoded_content, headers=headers)
# decode as json and return a dict
return response.json()
def print_entities(data):
"""
Iterates through a dict and prints out the Type and Name of an
entity if the appropriate attributes were found.
"""
for key,value in data.iteritems():
if '_typeGroup' in value and value['_typeGroup'] == 'entities':
print '%s: %s' % (value['_type'], value['name'])
def main():
"""
Main execution function to perform required actions
"""
# fetch array of press releases
press_releases = fetch_press_releases()
# iterate press releases
for release in press_releases:
# fetch entities using Open Calais API
entities = fetch_semantic_entities(release['body'])
# print the title and entities found
print '\n' + release['title']
print_entities(entities)
##########################################################################
## Execution
##########################################################################
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment