Skip to content

Instantly share code, notes, and snippets.

@jtauber

jtauber/markov_greek.py Secret

Created Aug 7, 2017
Embed
What would you like to do?
#!/usr/bin/env python3
import collections
import random
from greek_accentuation.characters import strip_accents
from greek_accentuation.syllabify import syllabify, rebreath
from greek_accentuation.accentuation import possible_accentuations, add_accent
# real word list (one word per line)
FILENAME = "sblgnt-words.txt"
# build bigram and trigram models from words in file
bigram = collections.defaultdict(lambda: collections.defaultdict(int))
trigram = collections.defaultdict(lambda: collections.defaultdict(int))
with open(FILENAME) as f:
for line in f:
w = " " + strip_accents(line.strip().lower()).replace("ς", "σ") + " "
for i in range(len(w) - 1):
bigram[w[i]][w[i+1]] += 1
if i < len(w) - 2:
trigram[w[i:i+2]][w[i+2]] += 1
# generate a nonce word, randomly accenting it according to law of limitation
def generate_nonce():
nonce = " " + random.choices(*zip(*bigram[" "].items()))[0]
while nonce[-1] != " ":
nonce += random.choices(*zip(*trigram[nonce[-2:]].items()))[0]
nonce = nonce.strip()
# final sigma
if nonce[-1] == "σ":
nonce = nonce[:-1] + "ς"
# smooth breathing unless υ
if nonce[0] == "υ":
nonce = "h" + nonce
nonce = rebreath(nonce)
# randomly accent
s = syllabify(nonce)
nonce = random.choice([
add_accent(s, accent_class)
for accent_class in possible_accentuations(s)
])
return nonce
# print 20 nonces
for i in range(20):
print(generate_nonce())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.