Isolate Watson Keywords
{ | |
"api": "https://gateway.watsonplatform.net/natural-language-understanding/api/v1/analyze", | |
"url": "http://_CONTENT_YOU_WANT_WATSON_TO_LOOK_AT_", | |
"username": "_YOUR_WATSON_API_USERNAME_", | |
"password": "_YOUR_WATSON_API_PASSWORD_", | |
"threshold": 0.70, | |
"limit": 20 | |
} |
import json | |
import requests | |
# Read configuration options | |
with open( 'watson.json' ) as data_file: | |
config = json.load( data_file ) | |
# Assemble API parameters | |
# Keywords only please | |
params = { | |
'version': '2017-02-27', | |
'url': config['url'], | |
'features': 'keywords', | |
'keywords.emotion': 'false', | |
'keywords.sentiment': 'false', | |
'keywords.limit': config['limit'] | |
} | |
# Call API | |
# JSON results to Python dictionary | |
req = requests.get( | |
config['api'], | |
auth = ( config['username'], config['password'] ), | |
params = params | |
) | |
res = req.json() | |
# Hold results | |
results = [] | |
# Iterate found keywords | |
for keyword in res['keywords']: | |
# Over relevance threshold | |
if keyword['relevance'] > config['threshold']: | |
# Debug: Original keywords and their relevance | |
# print '{0}: {1}'.format( keyword['relevance'], keyword['text'] ) | |
# Split keywords with more than one word | |
# Single words into array of one element | |
if keyword['text'].find( ' ' ) >= 0: | |
words = keyword['text'].split( ' ' ); | |
else: | |
words = [keyword['text']] | |
# Words in this keyword | |
for word in words: | |
found = False | |
# Discard known keywords | |
for existing in results: | |
if existing['text'] == word: | |
found = True | |
break | |
# Place into cleaned results array | |
if found == False: | |
results.append( { | |
'relevance': keyword['relevance'], | |
'text': word | |
} ) | |
# Here you go | |
for keyword in results: | |
print '{0}: {1}'.format( keyword['relevance'], keyword['text'] ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment