Skip to content

Instantly share code, notes, and snippets.

@joe-sullivan
Created April 5, 2017 12:41
Show Gist options
  • Save joe-sullivan/fe8f486fc23ee51db14e56a7a2141c59 to your computer and use it in GitHub Desktop.
Save joe-sullivan/fe8f486fc23ee51db14e56a7a2141c59 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from collections import *
from random import random
def train_char_lm(fname, order=4):
with open(fname, 'r') as f:
data = f.read()
lm = defaultdict(Counter)
pad = '~' * order
data = pad + data
for i in range(len(data)-order):
history, char = data[i:i+order], data[i+order]
lm[history][char]+=1
def normalize(counter):
s = float(sum(counter.values()))
return [(c,cnt/s) for c,cnt in counter.items()]
outlm = {hist:normalize(chars) for hist, chars in lm.items()}
return outlm
def generate_letter(lm, history, order):
history = history[-order:]
dist = lm[history]
x = random()
for c,v in dist:
x = x - v
if x <= 0: return c
def generate_text(lm, order, nletters=1000):
history = '~' * order
out = []
for _ in range(nletters):
c = generate_letter(lm, history, order)
history = history[-order:] + c
out.append(c)
return ''.join(out)
if __name__ == '__main__':
order = 4
lm = train_char_lm('input.txt', order=order)
print(generate_text(lm, order))
@woctezuma
Copy link

woctezuma commented Mar 7, 2019

Thanks for the code, but this is not RNN. This is char-level n-grams without RNN.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment