Created
April 21, 2017 20:55
-
-
Save krhoyt/e3aa96650a7dfca4bdbc5ca776013c8a to your computer and use it in GitHub Desktop.
Isolate Watson Keywords
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"api": "https://gateway.watsonplatform.net/natural-language-understanding/api/v1/analyze", | |
"url": "http://_CONTENT_YOU_WANT_WATSON_TO_LOOK_AT_", | |
"username": "_YOUR_WATSON_API_USERNAME_", | |
"password": "_YOUR_WATSON_API_PASSWORD_", | |
"threshold": 0.70, | |
"limit": 20 | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import requests | |
# Read configuration options | |
with open( 'watson.json' ) as data_file: | |
config = json.load( data_file ) | |
# Assemble API parameters | |
# Keywords only please | |
params = { | |
'version': '2017-02-27', | |
'url': config['url'], | |
'features': 'keywords', | |
'keywords.emotion': 'false', | |
'keywords.sentiment': 'false', | |
'keywords.limit': config['limit'] | |
} | |
# Call API | |
# JSON results to Python dictionary | |
req = requests.get( | |
config['api'], | |
auth = ( config['username'], config['password'] ), | |
params = params | |
) | |
res = req.json() | |
# Hold results | |
results = [] | |
# Iterate found keywords | |
for keyword in res['keywords']: | |
# Over relevance threshold | |
if keyword['relevance'] > config['threshold']: | |
# Debug: Original keywords and their relevance | |
# print '{0}: {1}'.format( keyword['relevance'], keyword['text'] ) | |
# Split keywords with more than one word | |
# Single words into array of one element | |
if keyword['text'].find( ' ' ) >= 0: | |
words = keyword['text'].split( ' ' ); | |
else: | |
words = [keyword['text']] | |
# Words in this keyword | |
for word in words: | |
found = False | |
# Discard known keywords | |
for existing in results: | |
if existing['text'] == word: | |
found = True | |
break | |
# Place into cleaned results array | |
if found == False: | |
results.append( { | |
'relevance': keyword['relevance'], | |
'text': word | |
} ) | |
# Here you go | |
for keyword in results: | |
print '{0}: {1}'.format( keyword['relevance'], keyword['text'] ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment