Last active
August 29, 2015 14:06
-
-
Save ACEfanatic02/947e877b5951515e287a to your computer and use it in GitHub Desktop.
Simple Markov chain generator. Based partially on: https://golang.org/doc/codewalk/markov/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import random | |
from collections import defaultdict | |
class MarkovPrefix(object): | |
def __init__(self, n): | |
if n < 1: | |
raise ValueError("MarkovPrefix requires a length of at least 1") | |
self.tokens = ["" for i in xrange(n)] | |
def shift(self, token): | |
self.tokens = self.tokens[1:] | |
self.tokens.append(token) | |
def __str__(self): | |
return " ".join(self.tokens) | |
class MarkovChain(object): | |
def __init__(self, n): | |
self.chain = defaultdict(list) | |
self.n = n | |
def feed(self, text): | |
"""Feed the chain a new chunk. | |
Text should be chunked into logical sections -- at least one sentence. | |
The prefix is reset on each call. Chains will not (directly) continue | |
across `feed()` calls | |
""" | |
prefix = MarkovPrefix(self.n) | |
for token in text.split(): | |
self.chain[unicode(prefix)].append(token) | |
prefix.shift(token) | |
def generate(self, n): | |
"""Generate a block of text from the chain. | |
Returns a block of text with at most `n` tokens. May return less than | |
`n` tokens if a null state is reached. | |
""" | |
p = MarkovPrefix(self.n) | |
words = [] | |
for i in xrange(n): | |
choices = self.chain[unicode(p)] | |
if len(choices) == 0: | |
break # Null state | |
next = random.choice(choices) | |
words.append(next) | |
p.shift(next) | |
return " ".join(words) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment