Skip to content

Instantly share code, notes, and snippets.

@tezansahu
Last active April 23, 2022 21:32
Show Gist options
  • Save tezansahu/88e64a5d4ac18bb7fd51792d33a12325 to your computer and use it in GitHub Desktop.
Save tezansahu/88e64a5d4ac18bb7fd51792d33a12325 to your computer and use it in GitHub Desktop.
from keybert import KeyBERT
import nltk
# Use NLTK's data downloader to download the required data packages (WordNet and Open Multilingual Wordnet) if not present already
for resource in ["wordnet", "omw-1.4"]:
try:
nltk_path = nltk.find("corpora/{0}".format(resource))
except Exception:
nltk.download(resource)
from nltk.corpus import wordnet
class KeywordSynonyms:
def __init__(self):
# KeyBERT model for Keyword Extraction
print("Loading KeyBERT Model for Keyword Extraction.")
self.keyword_extraction_model = KeyBERT()
def extractKeywords(self, text):
keywords = self.keyword_extraction_model.extract_keywords(text)
# The output is of the format [('keyword1', 'score1'), ('keyword2', 'score2'), ...]
return [x[0] for x in keywords]
def getSynonyms(self, word, max_synonyms=6):
synonyms = []
for syn in wordnet.synsets(word):
for l in syn.lemmas():
synonyms.append(l.name().replace("_", " "))
# Multi-word synonyms contain a '_' between the words, which needs to be replaced with a ' '
return [x for x in list(set(synonyms)) if x.lower() != word.lower()][:max_synonyms]
# Consider those synonyms that are not the same as the original word
def getSynonymsForKeywords(self, text, max_synonyms=6):
keywords = self.extractKeywords(text)
kw_syn = {}
for word in keywords:
synonyms = self.getSynonyms(word, max_synonyms)
if len(synonyms) > 0:
kw_syn[word] = synonyms
return kw_syn
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment