Skip to content

Instantly share code, notes, and snippets.

@makmac213
Created December 12, 2021 15:36
Show Gist options
  • Save makmac213/43a05c3c5b88c9ef55e467705908a410 to your computer and use it in GitHub Desktop.
Save makmac213/43a05c3c5b88c9ef55e467705908a410 to your computer and use it in GitHub Desktop.
Auto-suggest next word
import re
from nltk import (
FreqDist,
ngrams,
word_tokenize
)
def tokenize_phrases(phrases):
ret = []
for phrase in phrases:
words = re.sub(r'[^a-zA-Z \n]+', '', phrase)
tokenized = word_tokenize(phrase)
tokenized = [word.lower() for word in tokenized]
ret.append(tokenized)
return ret
def get_bigram_dict(phrases):
fd = FreqDist()
for phrase in phrases:
for bigram in ngrams(phrase, 2):
fd[bigram] += 1
bigram_dict = {}
bigram_list = list(fd)
for item in bigram_list:
if item[0] in bigram_dict:
bigram_dict[item[0]].append(item[1])
else:
bigram_dict[item[0]] = [item[1]]
return bigram_dict
def suggest_next_word(curr_word, bigram_dict):
try:
return list(set(bigram_dict[curr_word]))
except KeyError:
return ""
phrases = [
"thank you very much",
"goodbye and thank you",
"best wishes",
"good to see you",
"nice to see you",
"looking forward to work with you",
]
tokens = tokenize_phrases(phrases)
bigram_dict = get_bigram_dict(tokens)
suggest_next_word('you', bigram_dict)
suggest_next_word('to', bigram_dict)
suggest_next_word('looking', bigram_dict)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment