Skip to content

Instantly share code, notes, and snippets.

@TheBlackParrot
Created March 23, 2016 02:33
Show Gist options
  • Save TheBlackParrot/80b52b37b9775e8e6fa7 to your computer and use it in GitHub Desktop.
Save TheBlackParrot/80b52b37b9775e8e6fa7 to your computer and use it in GitHub Desktop.
import random;
CORPUS_FILENAME = "/home/theblackparrot/corpus.txt";
corpus = {};
def addToCorpus(line):
words = line.split();
if len(words) < 2:
return;
phrase = words[0] + " " + words[1];
if len(words) == 2:
if phrase not in corpus:
corpus[phrase] = [];
return;
nextWord = words[2];
for i in range(1, len(words)):
if phrase not in corpus:
corpus[phrase] = [];
if not nextWord:
continue;
corpus[phrase].append(nextWord);
phrase = words[i] + " " + words[i+1];
if i+2 < len(words):
nextWord = words[i+2];
else:
nextWord = "";
def generateMarkovChain():
phrase = random.choice(list(corpus.keys()));
output = phrase;
while corpus[phrase] and len(corpus[phrase]) > 0 and len(output) < 1000:
parts = phrase.split();
nextWord = corpus[phrase][random.randint(0, len(corpus[phrase])-1)];
output += (" " + nextWord)
phrase = parts[1] + " " + nextWord;
return output.strip();
with open(CORPUS_FILENAME, 'r') as CORPUS_FILE:
for line in CORPUS_FILE:
if line:
addToCorpus(line);
print(generateMarkovChain());
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment