Skip to content

Instantly share code, notes, and snippets.

@bobmurder
Created October 15, 2012 16:31
Show Gist options
  • Save bobmurder/3893436 to your computer and use it in GitHub Desktop.
Save bobmurder/3893436 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
markov chain string generator thing
"""
from collections import defaultdict
import itertools
import random
import sys
# constant
sentinel = '\n'
fname = 'kjv.txt'
def file_to_list(fname):
with open(fname) as f:
output = ' '.join(line.strip() for line in f)
return output.split()
# add sentinel value
words = file_to_list(fname) + list(sentinel)
def triplets(words):
# yield triplets until sentinel is reached
for idx, word in enumerate(words):
if sentinel in (words[idx], words[idx+1]):
break
else:
yield ((words[idx], words[idx + 1]), words[idx + 2])
triplets = triplets(words)
def make_chains(triplets):
chains = defaultdict(list)
for prefix, suffix in triplets:
chains[prefix].append(suffix)
return chains
chains = make_chains(triplets)
def generate(words, chains, prefix, max_length=10000):
# yield a word until the sentinel value is reached.
counter = 0
max_length = max_length - 3
while True:
if sentinel in prefix or counter >= max_length:
break
else:
word = random.choice(chains[prefix])
yield word
counter += 1
prefix = (prefix[1], word)
if __name__ == '__main__':
max_length = int(sys.argv[1]) if sys.argv[1] else 1000
prefix = tuple(words[:2])
# this concanenates '\n' and prefix into a list
output = [elem for elem in itertools.chain(sentinel, prefix)]
for word in generate(words, chains, prefix, max_length):
output.append(word)
print '%s words generated' % len(output)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment