Skip to content

Instantly share code, notes, and snippets.

@maxcountryman
Created January 31, 2011 00:25
Show Gist options
  • Save maxcountryman/803469 to your computer and use it in GitHub Desktop.
Save maxcountryman/803469 to your computer and use it in GitHub Desktop.
Simple Markov chain generator
# big.txt here --> http://norvig.com/big.txt
from random import randrange, choice
MIN = 3
MAX = 2
class MarkovChain(object):
'''Provides the necessary functions to generate a Markov chain.
An instance of MarkovChain will take a filepath to a corpus of plaintext as an
argument.
'''
def __init__(self, corpus):
with open(corpus) as f:
self.words = f.read().split()
self.corpus_len = len(self.words)
self.cache = {}
self.traverse()
def three(self, words):
'''Generates three words, then the next three, omitting the first.
For example assuming `words` = ['a', 'b', 'c', 'd', 'e']:
a, b, c -> b, c, d
'''
if len(words) < MIN:
return
for i in range(len(words) - 2):
yield (words[i], words[i + 1], words[i + 2])
def traverse(self):
'''Creates a cache utilized by the Markov chain.'''
for one, two, three in self.three(self.words):
key = (one, two)
if key in self.cache:
self.cache[key].append(three)
else:
self.cache[key] = [three]
def generate(self):
'''Generates a Markov chain of a rangom length.'''
length = randrange(60, 224) # paragraph random range
seed = randrange(0, self.corpus_len - MAX)
one, two = self.words[seed], self.words[seed + 1]
generation = []
for i in range(length):
generation.append(one)
one, two = two, choice(self.cache[(one, two)])
generation.append(two)
return ' '.join(generation)
if __name__ == '__main__':
print MarkovChain('big.txt').generate()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment