Isolate Watson Keywords
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"api": "https://gateway.watsonplatform.net/natural-language-understanding/api/v1/analyze", | |
"url": "http://_CONTENT_YOU_WANT_WATSON_TO_LOOK_AT_", | |
"username": "_YOUR_WATSON_API_USERNAME_", | |
"password": "_YOUR_WATSON_API_PASSWORD_", | |
"threshold": 0.70, | |
"limit": 20 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
# Read configuration options | |
with open( 'watson.json' ) as data_file: | |
config = json.load( data_file ) | |
# Assemble API parameters | |
# Keywords only please | |
params = { | |
'version': '2017-02-27', | |
'url': config['url'], | |
'features': 'keywords', | |
'keywords.emotion': 'false', | |
'keywords.sentiment': 'false', | |
'keywords.limit': config['limit'] | |
} | |
# Call API | |
# JSON results to Python dictionary | |
req = requests.get( | |
config['api'], | |
auth = ( config['username'], config['password'] ), | |
params = params | |
) | |
res = req.json() | |
# Hold results | |
results = [] | |
# Iterate found keywords | |
for keyword in res['keywords']: | |
# Over relevance threshold | |
if keyword['relevance'] > config['threshold']: | |
# Debug: Original keywords and their relevance | |
# print '{0}: {1}'.format( keyword['relevance'], keyword['text'] ) | |
# Split keywords with more than one word | |
# Single words into array of one element | |
if keyword['text'].find( ' ' ) >= 0: | |
words = keyword['text'].split( ' ' ); | |
else: | |
words = [keyword['text']] | |
# Words in this keyword | |
for word in words: | |
found = False | |
# Discard known keywords | |
for existing in results: | |
if existing['text'] == word: | |
found = True | |
break | |
# Place into cleaned results array | |
if found == False: | |
results.append( { | |
'relevance': keyword['relevance'], | |
'text': word | |
} ) | |
# Here you go | |
for keyword in results: | |
print '{0}: {1}'.format( keyword['relevance'], keyword['text'] ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment