Skip to content

Instantly share code, notes, and snippets.

@microamp
Created March 26, 2021 05:36
Show Gist options
  • Save microamp/5ab2feaaf06802ecebdd6fbf8071940e to your computer and use it in GitHub Desktop.
Save microamp/5ab2feaaf06802ecebdd6fbf8071940e to your computer and use it in GitHub Desktop.
A super simple implementation of Markov chain
import collections
import random
def iter_words(rows):
return (
s.strip() for row in rows for s in row.split(" ") if s.strip() != ""
)
def iter_partitions(ss, size):
ls = list(ss)
for i in range(len(ls) - size + 1):
yield ls[i : i + size]
def build_chain(partitions, prefix_len):
d = collections.defaultdict(list)
for p in partitions:
k = tuple(p[:prefix_len])
v = p[-1]
d[k].append(v)
return d
def generate(chain, prefix):
for word in prefix:
yield word
# count = 0
while True:
if prefix[-1].endswith("."):
break
words = chain[prefix]
selected = random.choice(words)
yield selected
# count += 1
prefix = prefix[1:] + (selected,)
filename = "/home/microamp/src/github.com/microamp/microq/cmd/poetry-archive/poems.txt"
with open(filename, mode="r") as f:
prefix_len = 2
words = iter_words(f.readlines())
partitions = iter_partitions(words, prefix_len + 1)
chain = build_chain(partitions, prefix_len)
# most_common = sorted(
# ((k, len(chain[k])) for k in chain),
# key=lambda pair: pair[1],
# reverse=True,
# )[:30]
# # pprint.pprint(most_common)
# prefixes = [pair[0] for pair in most_common]
prefixes = (("through", "the"),)
generated = generate(chain, random.choice(prefixes))
for word in generated:
print(word, end=" ")
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment