Created
January 31, 2011 00:25
-
-
Save maxcountryman/803469 to your computer and use it in GitHub Desktop.
Simple Markov chain generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# big.txt here --> http://norvig.com/big.txt | |
from random import randrange, choice | |
MIN = 3 | |
MAX = 2 | |
class MarkovChain(object): | |
'''Provides the necessary functions to generate a Markov chain. | |
An instance of MarkovChain will take a filepath to a corpus of plaintext as an | |
argument. | |
''' | |
def __init__(self, corpus): | |
with open(corpus) as f: | |
self.words = f.read().split() | |
self.corpus_len = len(self.words) | |
self.cache = {} | |
self.traverse() | |
def three(self, words): | |
'''Generates three words, then the next three, omitting the first. | |
For example assuming `words` = ['a', 'b', 'c', 'd', 'e']: | |
a, b, c -> b, c, d | |
''' | |
if len(words) < MIN: | |
return | |
for i in range(len(words) - 2): | |
yield (words[i], words[i + 1], words[i + 2]) | |
def traverse(self): | |
'''Creates a cache utilized by the Markov chain.''' | |
for one, two, three in self.three(self.words): | |
key = (one, two) | |
if key in self.cache: | |
self.cache[key].append(three) | |
else: | |
self.cache[key] = [three] | |
def generate(self): | |
'''Generates a Markov chain of a rangom length.''' | |
length = randrange(60, 224) # paragraph random range | |
seed = randrange(0, self.corpus_len - MAX) | |
one, two = self.words[seed], self.words[seed + 1] | |
generation = [] | |
for i in range(length): | |
generation.append(one) | |
one, two = two, choice(self.cache[(one, two)]) | |
generation.append(two) | |
return ' '.join(generation) | |
if __name__ == '__main__': | |
print MarkovChain('big.txt').generate() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment