rpet064 rpet064

## ngram_model_main.py
def create_ngram_model(n, path):
    m = NgramModel(n)
    with open(path, 'r') as f:
        text = f.read()
        text = text.split('.')
        for sentence in text:
            # add back the fullstop
            sentence += '.'
            m.update(sentence)
    return m

## ngram_model_generation.py
    def random_token(self, context):
        """
        Given a context we "semi-randomly" select the next word to append in a sequence
        :param context:
        :return:
        """
        r = random.random()
        map_to_probs = {}
        token_of_interest = self.context[context]
        for token in token_of_interest:

## ngram_model_prob.py
    def prob(self, context, token):
        """
        Calculates probability of a candidate token to be generated given a context
        :return: conditional probability
        """
        try:
            count_of_token = self.ngram_counter[(context, token)]
            count_of_context = float(len(self.context[context]))
            result = count_of_token / count_of_context

## ngram_model_init.py
class NgramModel(object):

    def __init__(self, n):
        self.n = n

        # dictionary that keeps list of candidate words given context
        self.context = {}

        # keeps track of how many times ngram has appeared in the text before
        self.ngram_counter = {}

## NGram_helper_functions.py
def tokenize(text: str) -> List[str]:
    """
    :param text: Takes input sentence
    :return: tokenized sentence
    """
    for punct in string.punctuation:
        text = text.replace(punct, ' '+punct+' ')
    t = text.split()
    return t

## gist:fb2c72ee55ca64d5a8df83013471bc43
import random

class Markov(object):

	def __init__(self, open_file):
		self.cache = {}
		self.open_file = open_file
		self.words = self.file_to_words()
		self.word_size = len(self.words)
		self.database()
	def create_ngram_model(n, path):
	m = NgramModel(n)
	with open(path, 'r') as f:
	text = f.read()
	text = text.split('.')
	for sentence in text:
	# add back the fullstop
	sentence += '.'
	m.update(sentence)
	return m
	def random_token(self, context):
	"""
	Given a context we "semi-randomly" select the next word to append in a sequence
	:param context:
	:return:
	"""
	r = random.random()
	map_to_probs = {}
	token_of_interest = self.context[context]
	for token in token_of_interest:
	def prob(self, context, token):
	"""
	Calculates probability of a candidate token to be generated given a context
	:return: conditional probability
	"""
	try:
	count_of_token = self.ngram_counter[(context, token)]
	count_of_context = float(len(self.context[context]))
	result = count_of_token / count_of_context
	class NgramModel(object):

	def __init__(self, n):
	self.n = n

	# dictionary that keeps list of candidate words given context
	self.context = {}

	# keeps track of how many times ngram has appeared in the text before
	self.ngram_counter = {}
	def tokenize(text: str) -> List[str]:
	"""
	:param text: Takes input sentence
	:return: tokenized sentence
	"""
	for punct in string.punctuation:
	text = text.replace(punct, ' '+punct+' ')
	t = text.split()
	return t
	import random

	class Markov(object):

	def __init__(self, open_file):
	self.cache = {}
	self.open_file = open_file
	self.words = self.file_to_words()
	self.word_size = len(self.words)
	self.database()