dellis23/markov.py

## markov.py
import random

class Markov(object):

	def __init__(self, open_file, chain_size=3):
		self.chain_size = chain_size
		self.cache = {}
		self.open_file = open_file
		self.words = self.file_to_words()
		self.word_size = len(self.words)
		self.database()

	def file_to_words(self):
		self.open_file.seek(0)
		data = self.open_file.read()
		words = data.split()
		return words

	def words_at_position(self, i):
		"""Uses the chain size to find a list of the words at an index."""
		chain = []
		for chain_index in range(0, self.chain_size):
			chain.append(self.words[i + chain_index])
		return chain

	def chains(self):
		"""Generates chains from the given data string based on passed chain size.

		So if our string were:

			"What a lovely day"

		With a chain size of 3, we'd generate:

			(What, a, lovely)

		and

			(a, lovely, day)
		"""

		if len(self.words) < self.chain_size:
			return

		for i in range(len(self.words) - self.chain_size - 1):
			yield tuple(self.words_at_position(i))

	def database(self):
		for chain_set in self.chains():
			key = chain_set[:self.chain_size - 1]
			next_word = chain_set[-1]
			if key in self.cache:
				self.cache[key].append(next_word)
			else:
				self.cache[key] = [next_word]

	def generate_markov_text(self, size=25):
		seed = random.randint(0, self.word_size - 3)
		gen_words = []
		seed_words = self.words_at_position(seed)[:-1]
		gen_words.extend(seed_words)
		for i in xrange(size):
			last_word_len = self.chain_size - 1
			last_words = gen_words[-1 * last_word_len:]
			next_word = random.choice(self.cache[tuple(last_words)])
			gen_words.append(next_word)
		return ' '.join(gen_words)
	import random

	class Markov(object):

	def __init__(self, open_file, chain_size=3):
	self.chain_size = chain_size
	self.cache = {}
	self.open_file = open_file
	self.words = self.file_to_words()
	self.word_size = len(self.words)
	self.database()

	def file_to_words(self):
	self.open_file.seek(0)
	data = self.open_file.read()
	words = data.split()
	return words

	def words_at_position(self, i):
	"""Uses the chain size to find a list of the words at an index."""
	chain = []
	for chain_index in range(0, self.chain_size):
	chain.append(self.words[i + chain_index])
	return chain

	def chains(self):
	"""Generates chains from the given data string based on passed chain size.

	So if our string were:

	"What a lovely day"

	With a chain size of 3, we'd generate:

	(What, a, lovely)

	and

	(a, lovely, day)
	"""

	if len(self.words) < self.chain_size:
	return

	for i in range(len(self.words) - self.chain_size - 1):
	yield tuple(self.words_at_position(i))

	def database(self):
	for chain_set in self.chains():
	key = chain_set[:self.chain_size - 1]
	next_word = chain_set[-1]
	if key in self.cache:
	self.cache[key].append(next_word)
	else:
	self.cache[key] = [next_word]

	def generate_markov_text(self, size=25):
	seed = random.randint(0, self.word_size - 3)
	gen_words = []
	seed_words = self.words_at_position(seed)[:-1]
	gen_words.extend(seed_words)
	for i in xrange(size):
	last_word_len = self.chain_size - 1
	last_words = gen_words[-1 * last_word_len:]
	next_word = random.choice(self.cache[tuple(last_words)])
	gen_words.append(next_word)
	return ' '.join(gen_words)