Skip to content

Instantly share code, notes, and snippets.

@justinvanwinkle
Last active December 21, 2015 13:48
Show Gist options
  • Save justinvanwinkle/6314903 to your computer and use it in GitHub Desktop.
Save justinvanwinkle/6314903 to your computer and use it in GitHub Desktop.
from __future__ import division
import random
import sys
import re
from math import floor
from math import ceil
from collections import defaultdict
_tokenize = re.compile(r'((?:\w{1,20}\'\w+)|(?:\w{1,20})|(?:[.,]))')
def ngrams(tokens):
print tokens
n_tokens = len(tokens)
for i in xrange(n_tokens):
for j in xrange(i + 2, min(n_tokens, i + 6) + 1):
print tuple(tokens[i:j])
yield tuple(tokens[i:j])
def tokenize(s):
return _tokenize.findall(s)
splits = {
6: 3,
5: 3,
4: 2,
3: 2,
2: 1}
class Markov(object):
def __init__(self):
self.grams = defaultdict(list)
def add_text(self, s):
for gram in ngrams(tokenize(s)):
split = splits[len(gram)]
self.grams[gram[:split]].append(gram[split:])
def get_next(self, current):
possibilities = []
for gram_size in range(3):
test_gram = current[:-gram_size] if gram_size > 0 else current
if test_gram in self.grams:
possibilities.extend(self.grams[test_gram])
for gram in self.grams[test_gram]:
if random.random() > .2:
return gram
if not possibilities:
return None
next_part = random.choice(possibilities)
return next_part
def make_ngram(self, seed=None, max_len=20):
if seed is None:
part = random.choice(self.grams.keys())
poem = []
while '.' not in part:
poem.extend(part)
part = self.get_next(tuple(poem[-3:]))
if part is None:
break
if part:
poem.extend(part)
if '.' in poem:
return poem[:poem.index('.')][:max_len]
return poem[:max_len]
def main():
text = sys.stdin.read()
m = Markov()
for line in text.splitlines():
m.add_text(line)
print ' '.join(m.make_ngram())
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment