Last active
March 20, 2021 01:54
-
-
Save nic-hartley/148956b58ca8a2ec9f231719a5143cf1 to your computer and use it in GitHub Desktop.
Englishish generator
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
englishish.py is a quick-and-dirty prototype of a project I plan to work on | |
more later: A generator for text which obeys English spelling and grammar | |
rules, but whose output is nonsensical. The output of this script does tend | |
to make sense, but that's a product of the dictionary. If it was larger, it | |
would generate sentences which are grammatical but not sensical. | |
The next step is implementing this with a bigger dictionary, cleaner code, and | |
probably in Rust as a C-compatible library. I plan to use it in a little art | |
game about Borges' Library of Babel -- to generate the contents of the books. | |
Once it's implemented in Rust, it'll probably be FOSS'd as a separate repo. | |
""" | |
from collections import namedtuple | |
import random | |
def det_a_an(next): | |
# not ideal but close enough | |
if next[0] in 'aeiou': | |
return 'an' | |
else: | |
return 'a' | |
def det_thy_thine(next): | |
if next[0] in 'aeiou': | |
return 'thine' | |
else: | |
return 'thy' | |
TERMINALS = { | |
'article': ['the', det_a_an], | |
'of': ['of'], | |
'conjunction': ['and', 'or'], | |
'adverb': ['widely', 'angrily', 'cutely', 'really'], | |
'adjective': ['wide', 'angry', 'cute'], | |
'1.nom.noun': ['cat', 'dog', 'keyboard', 'otter'], | |
'M.nom.noun': ['cats', 'dogs', 'keyboards', 'otters'], | |
'1.pos.noun': ["cat's", "dog's", "keyboard's", "otter's"], | |
'M.pos.noun': ["cats'", "dogs'", "keyboards'", "otters'"], | |
'1.nom.pron': ['I', 'thou', 'he', 'she', 'they'], | |
'M.nom.pron': ['we', "y'all", 'they'], | |
'pos.pron': [ | |
# singular | |
'my', det_thy_thine, 'his', 'her', 'their', | |
# plural | |
'our', "y'all's", 'their' | |
], | |
'M.verb': ['like', 'hug', 'poke'], | |
'1.verb': ['likes', 'hugs', 'pokes'], | |
} | |
OPTIONS = { | |
'sentence': [ | |
(3, ['1.noun-phrase', '1.verb-phrase', '1.noun-phrase']), | |
(1, ['M.noun-phrase', 'M.verb-phrase', '1.noun-phrase']), | |
(3, ['1.noun-phrase', '1.verb-phrase', 'M.noun-phrase']), | |
(1, ['M.noun-phrase', 'M.verb-phrase', 'M.noun-phrase']), | |
], | |
'1.noun-phrase': [ | |
(3, ['article', '1.described-noun']), | |
(1, ['pos.pron', '1.described-noun']), | |
], | |
'1.described-noun': [ | |
(8, ['1.nom.noun']), | |
(3, ['adjective', '1.nom.noun']), | |
(2, ['1.pos.noun', '1.nom.noun']), | |
(1, ['M.pos.noun', '1.nom.noun']), | |
(1, ['adverb', 'adjective', '1.described-noun']), | |
], | |
'M.noun-phrase': [ | |
(3, ['article', 'M.described-noun']), | |
(1, ['pos.pron', 'M.described-noun']), | |
(3, ['M.described-noun']), | |
], | |
'M.described-noun': [ | |
(16, ['M.nom.noun']), | |
(6, ['adjective', 'M.nom.noun']), | |
(4, ['1.pos.noun', 'M.nom.noun']), | |
(4, ['M.pos.noun', 'M.nom.noun']), | |
(2, ['adverb', 'M.described-noun']), | |
(1, ['1.noun-phrase', 'conjunction', '1.noun-phrase']), | |
(1, ['M.noun-phrase', 'conjunction', '1.noun-phrase']), | |
(1, ['1.noun-phrase', 'conjunction', 'M.noun-phrase']), | |
(1, ['M.noun-phrase', 'conjunction', 'M.noun-phrase']), | |
], | |
'1.verb-phrase': [ | |
(4, ['1.verb']), | |
(1, ['adverb', '1.verb-phrase']), | |
], | |
'M.verb-phrase': [ | |
(4, ['M.verb']), | |
(1, ['adverb', 'M.verb-phrase']), | |
], | |
} | |
def weighted_choose_child(name): | |
children = OPTIONS[name] | |
total_odds = sum(weight for weight, _ in children) | |
spot = random.randrange(total_odds) | |
for weight, components in children: | |
if spot < weight: | |
return components | |
else: | |
spot -= weight | |
else: | |
raise ValueError("what") | |
def expand_options(root): | |
res = [] | |
stack = [root] | |
while stack: | |
top = stack.pop() | |
if top in TERMINALS: | |
res.append(top) | |
else: | |
nexts = weighted_choose_child(top) | |
stack.extend(nexts[::-1]) | |
return res | |
def generate(terminals): | |
words = [] | |
for term in terminals[::-1]: | |
val = random.choice(TERMINALS[term]) | |
if isinstance(val, str): | |
words.insert(0, val) | |
elif hasattr(val, '__call__'): | |
words.insert(0, val(words[0])) | |
else: | |
raise ValueError("fucc") | |
return ' '.join(words) | |
if __name__ == '__main__': | |
print(generate(expand_options('sentence'))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment