tezansahu/janus_paraphrasingModule_keywordSynonyms.py

## janus_paraphrasingModule_keywordSynonyms.py
from keybert import KeyBERT
import nltk

# Use NLTK's data downloader to download the required data packages (WordNet and Open Multilingual Wordnet) if not present already
for resource in ["wordnet", "omw-1.4"]:
    try:
        nltk_path = nltk.find("corpora/{0}".format(resource))
    except Exception:
        nltk.download(resource)

from nltk.corpus import wordnet

class KeywordSynonyms:
    def __init__(self):
        # KeyBERT model for Keyword Extraction
        print("Loading KeyBERT Model for Keyword Extraction.")
        self.keyword_extraction_model = KeyBERT()

    def extractKeywords(self, text):
        keywords = self.keyword_extraction_model.extract_keywords(text)
        # The output is of the format [('keyword1', 'score1'), ('keyword2', 'score2'), ...]
        return [x[0] for x in keywords]

    def getSynonyms(self, word, max_synonyms=6):
        synonyms = []
        for syn in wordnet.synsets(word):
            for l in syn.lemmas():
                synonyms.append(l.name().replace("_", " "))
                # Multi-word synonyms contain a '_' between the words, which needs to be replaced with a ' '

        return [x for x in list(set(synonyms)) if x.lower() != word.lower()][:max_synonyms]
        # Consider those synonyms that are not the same as the original word

    def getSynonymsForKeywords(self, text, max_synonyms=6):
        keywords = self.extractKeywords(text)
        kw_syn = {}
        for word in keywords:
            synonyms = self.getSynonyms(word, max_synonyms)
            if len(synonyms) > 0:
                kw_syn[word] = synonyms

        return kw_syn
	from keybert import KeyBERT
	import nltk

	# Use NLTK's data downloader to download the required data packages (WordNet and Open Multilingual Wordnet) if not present already
	for resource in ["wordnet", "omw-1.4"]:
	try:
	nltk_path = nltk.find("corpora/{0}".format(resource))
	except Exception:
	nltk.download(resource)

	from nltk.corpus import wordnet

	class KeywordSynonyms:
	def __init__(self):
	# KeyBERT model for Keyword Extraction
	print("Loading KeyBERT Model for Keyword Extraction.")
	self.keyword_extraction_model = KeyBERT()

	def extractKeywords(self, text):
	keywords = self.keyword_extraction_model.extract_keywords(text)
	# The output is of the format [('keyword1', 'score1'), ('keyword2', 'score2'), ...]
	return [x[0] for x in keywords]

	def getSynonyms(self, word, max_synonyms=6):
	synonyms = []
	for syn in wordnet.synsets(word):
	for l in syn.lemmas():
	synonyms.append(l.name().replace("_", " "))
	# Multi-word synonyms contain a '_' between the words, which needs to be replaced with a ' '

	return [x for x in list(set(synonyms)) if x.lower() != word.lower()][:max_synonyms]
	# Consider those synonyms that are not the same as the original word

	def getSynonymsForKeywords(self, text, max_synonyms=6):
	keywords = self.extractKeywords(text)
	kw_syn = {}
	for word in keywords:
	synonyms = self.getSynonyms(word, max_synonyms)
	if len(synonyms) > 0:
	kw_syn[word] = synonyms

	return kw_syn