Skip to content

Instantly share code, notes, and snippets.

@jbg
Last active October 1, 2016 13:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jbg/77c1ef976a4379a3d7f20bcba6f48653 to your computer and use it in GitHub Desktop.
Save jbg/77c1ef976a4379a3d7f20bcba6f48653 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import argparse
from collections import Counter, defaultdict
from random import random, randint
from time import sleep
parser = argparse.ArgumentParser()
parser.add_argument("corpus", help="a filename for training")
parser.add_argument("length", type=int, help="how many characters to generate")
parser.add_argument("--order", type=int, help="the order to train with", default=8)
args = parser.parse_args()
training = open(args.corpus, "r").read().replace("\r", "").replace("\n", ". ")
pad = "~" * args.order
training = pad + training
lm = defaultdict(Counter)
for i in range(len(training) - args.order):
history, char = training[i:i+args.order], training[i+args.order]
lm[history][char] += 1
def normalise(counter):
s = float(sum(counter.values()))
return [(c, cnt/s) for c, cnt in counter.items()]
training_results = {hist: normalise(chars) for hist, chars in lm.items()}
def generate_letter(history):
history = history[-args.order:]
dist = lm[history]
x = random()
for c, v in dist.items():
x -= v
if x <= 0:
return c
history = "~" * args.order
out = []
for i in range(args.length):
c = generate_letter(history)
history = history[-args.order:] + c
out.append(c)
print("".join(out))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment