Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Very simple second-order Markov text generator. Train on whatever sources you want.
import random
normalizeLengths = True # Set the length of the two input sources to be equal.
# Generate transition matrix
A = "ABCDEFGHIJKLMNOPQRSTUVWXYZ "
charnums = len(A)
# Read through text and index transition matrix
trainingText = " ".join(open("KingJamesBible.txt",'r').read().splitlines())
trainingText2 = " ".join(open("GriffithsQuantumMechanics.txt",'r').read().splitlines())
trainingText = trainingText.split()
trainingText2 = trainingText2.split()
if normalizeLengths:
shorterSize = min(len(trainingText), len(trainingText2))
trainingText = trainingText[0:shorterSize]
trainingText2 = trainingText2[0:shorterSize]
trainingText = " ".join(trainingText + trainingText2).upper() # Removes double spaces and capitalizes
# Remove everything not in alphabet
newtrainingText = ""
for i in trainingText:
if i in A:
newtrainingText += i
trainingText = newtrainingText.split() # List of all caps words
# Building transition dictionary
d = {}
for i in xrange(len(trainingText)-2):
try:
if trainingText[i+2] not in d[str(trainingText[i] + " " + trainingText[i+1])]:
d[str(trainingText[i] + " " + trainingText[i+1])].append(trainingText[i+2])
except KeyError:
d[str(trainingText[i] + " " + trainingText[i+1])] = [trainingText[i+2]]
# Starting words
text = random.choice(d.keys()).split() # This is an array of words
#text = "ACCORDING TO ALL KNOWN LAWS OF AVIATION".split()
numWords = 1000
for i in xrange(numWords):
key = ' '.join(text[-2:])
text.append(random.choice(d[key]))
print ' '.join(text).lower()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment