Skip to content

Instantly share code, notes, and snippets.

@bobmurder
Created October 15, 2012 15:39
Show Gist options
  • Save bobmurder/3893159 to your computer and use it in GitHub Desktop.
Save bobmurder/3893159 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
markov chain string generator thing
"""
from collections import defaultdict
import itertools
import random
import sys
# constant
sentinel = '\n'
def file_to_list(fname):
with open(fname) as f:
output = ' '.join(line.strip() for line in f)
return output.split()
# add sentinel value
words = file_to_list(fname) + list(sentinel)
def triplets(words):
# yield triplets until sentinel is reached
for idx, word in enumerate(words):
if sentinel in (words[idx], words[idx+1]):
break
else:
yield ((words[idx], words[idx + 1]), words[idx + 2])
triplets = triplets(words)
def make_chains(triplets):
chains = defaultdict(list)
for prefix, suffix in triplets:
chains[prefix].append(suffix)
return chains
chains = make_chains(triplets)
def generate(words, chains, prefix, max_length=10000):
# yield a word until the sentinel value is reached.
while True:
if sentinel in prefix:
break
else:
word = random.choice(chains[prefix])
yield word
prefix = (prefix[1], word)
if __name__ == '__main__':
prefix = tuple(words[:2])
output = [elem for elem in itertools.chain(sentinel, prefix)]
for word in generate(words, chains, prefix):
output.append(word)
print ' '.join(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment