makmac213/diktador.py

## diktador.py
import re
from nltk import (
    FreqDist,
    ngrams,
    word_tokenize
)

def tokenize_phrases(phrases):
    ret = []
    for phrase in phrases:
        words = re.sub(r'[^a-zA-Z \n]+', '', phrase)
        tokenized = word_tokenize(phrase)
        tokenized = [word.lower() for word in tokenized]
        ret.append(tokenized)
    return ret

def get_bigram_dict(phrases):
    fd = FreqDist()
    for phrase in phrases:
        for bigram in ngrams(phrase, 2):
            fd[bigram] += 1
    bigram_dict = {}
    bigram_list = list(fd)
    for item in bigram_list:
        if item[0] in bigram_dict:
            bigram_dict[item[0]].append(item[1])
        else:
            bigram_dict[item[0]] = [item[1]]
    return bigram_dict

def suggest_next_word(curr_word, bigram_dict):
    try:
        return list(set(bigram_dict[curr_word]))
    except KeyError:
        return ""

phrases = [
    "thank you very much",
    "goodbye and thank you",
    "best wishes",
    "good to see you",
    "nice to see you",
    "looking forward to work with you",
]

tokens = tokenize_phrases(phrases)
bigram_dict = get_bigram_dict(tokens)
suggest_next_word('you', bigram_dict)
suggest_next_word('to', bigram_dict)
suggest_next_word('looking', bigram_dict)
	import re
	from nltk import (
	FreqDist,
	ngrams,
	word_tokenize
	)

	def tokenize_phrases(phrases):
	ret = []
	for phrase in phrases:
	words = re.sub(r'[^a-zA-Z \n]+', '', phrase)
	tokenized = word_tokenize(phrase)
	tokenized = [word.lower() for word in tokenized]
	ret.append(tokenized)
	return ret

	def get_bigram_dict(phrases):
	fd = FreqDist()
	for phrase in phrases:
	for bigram in ngrams(phrase, 2):
	fd[bigram] += 1
	bigram_dict = {}
	bigram_list = list(fd)
	for item in bigram_list:
	if item[0] in bigram_dict:
	bigram_dict[item[0]].append(item[1])
	else:
	bigram_dict[item[0]] = [item[1]]
	return bigram_dict

	def suggest_next_word(curr_word, bigram_dict):
	try:
	return list(set(bigram_dict[curr_word]))
	except KeyError:
	return ""

	phrases = [
	"thank you very much",
	"goodbye and thank you",
	"best wishes",
	"good to see you",
	"nice to see you",
	"looking forward to work with you",
	]

	tokens = tokenize_phrases(phrases)
	bigram_dict = get_bigram_dict(tokens)
	suggest_next_word('you', bigram_dict)
	suggest_next_word('to', bigram_dict)
	suggest_next_word('looking', bigram_dict)