Skip to content

Instantly share code, notes, and snippets.

@tomasonjo
Created December 27, 2022 21:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomasonjo/8bd7328b8b315d5fdb2e6df6dab62d94 to your computer and use it in GitHub Desktop.
Save tomasonjo/8bd7328b8b315d5fdb2e6df6dab62d94 to your computer and use it in GitHub Desktop.
def extract_keywords(text):
"""
Extract keywords and construct them back from tokens
"""
result = list()
keyword = ""
for token in nlp(text):
if token['entity'] == 'I-KEY':
keyword += token['word'][2:] if \
token['word'].startswith("##") else f" {token['word']}"
else:
if keyword:
result.append(keyword)
keyword = token['word']
# Add the last keyword
result.append(keyword)
return list(set(result))
extract_keywords("""
Broadcom agreed to acquire cloud computing company VMware in a $61 billion (€57bn) cash-and stock deal.
""") # ['cloud computing', 'vmware', 'broadcom']
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment