Created
January 20, 2019 12:30
-
-
Save kassuts/28fb2947ce487fe8a4202d1c47bf8ae7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def enrich_keywords(all_articles, all_keywords, threshold=0.8, keyword_limit=10): | |
params = {} | |
limit = 0 | |
for key, value in all_articles.items(): | |
# Concatenate Title and Abstract and assign to dictionary item 'text' for API call. | |
params['text'] = value[0] + ". " + value[1] | |
content = aylien_api_connect(params=params) | |
# Accesses the new keywords from the newly generated keyword list from the API | |
for keys, values in content['concepts'].items(): | |
new_keyword = values['surfaceForms'][0]['string'] | |
# Ensure the new keyword has a score above the set threshold in Line 20, | |
# that it does not already exist in the keywords list and finally, | |
# that the number of keywords has not exceeded the limit also set in Line 20. | |
if values['surfaceForms'][0]['score'] > threshold and new_keyword not in all_keywords[key] \ | |
and limit < keyword_limit: | |
# If all conditions satisfied, the keyword is capitalized and added to the dictionary. | |
all_keywords[key].append(new_keyword.capitalize()) | |
# Increment keyword limit counter by one for every new keyword added. | |
limit += 1 | |
return all_keywords |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment