Skip to content

Instantly share code, notes, and snippets.

@prozacgod
Created April 22, 2015 21:55
Show Gist options
  • Save prozacgod/5848c38357b7baa95ff9 to your computer and use it in GitHub Desktop.
Save prozacgod/5848c38357b7baa95ff9 to your computer and use it in GitHub Desktop.
import random
# expects a file named "allwords.txt" in the current directory and words are full caps
data = [s.strip() for s in open("allwords.txt", 'r').read().split("\n")]
def weightedRandom(weightedSet):
keys = weightedSet.keys()
keys.sort()
total = sum(weightedSet.values())
value = random.randint(1, total)
for k in keys:
value = value - weightedSet[k]
if value <= 0:
return k
class Markov():
def __init__(self):
self.data = {}
self.startCount = {}
self.startLengths = {}
self.minLength = 9999999999
self.maxLength = -9999999999
def digest(self, a, b):
if not a in self.data:
self.data[a] = {}
if not b in self.data[a]:
self.data[a][b] = 0
self.data[a][b] += 1
def digestStr(self, data):
if len(data) < self.minLength:
self.minLength = len(data)
if len(data) > self.maxLength:
self.maxLength = len(data)
prev = data[0]
if not prev in self.startCount:
self.startCount[prev] = 0
self.startCount[prev] += 1
if not prev in self.startLengths:
self.startLengths[prev] = {}
if not len(data) in self.startLengths[prev]:
self.startLengths[prev][len(data)] = 0
self.startLengths[prev][len(data)] += 1
for i in range(1, len(data)):
self.digest(prev, data[i])
prev = data[i]
def generate(self, first=None, length=None):
result = first
if first == None:
result = weightedRandom(self.startCount)
l = length
if length == None:
l = weightedRandom(self.startLengths[result[-1]])
while len(result) < l:
result += weightedRandom(self.data[result[-1]])
return result
f = Markov()
for s in data:
f.digestStr(s)
for i in range(20):
print f.generate()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment