Created
March 2, 2021 13:46
-
-
Save Anaphory/7e55a7834488164e26dfb75fc1442fd7 to your computer and use it in GitHub Desktop.
Small Python script to generate simple sentences for a synthetic language with minimal concatenative morphology
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import bisect | |
from collections import Counter | |
class DiscreteProbabilities: | |
def __init__(self, ps): | |
keys, values = zip(*sorted(ps.items(), key=lambda kv: kv[1], reverse=True)) | |
self.values = numpy.cumsum(values) | |
self.values /= self.values[-1] | |
self.keys = keys | |
def generate(self): | |
x = numpy.random.random() | |
i = bisect.bisect(self.values, x) | |
return self.keys[i] | |
def phonemes(): | |
"""Generate the phoneme inventory of a simlang""" | |
return { | |
"c": DiscreteProbabilities( | |
{ | |
k: 1 / i | |
for i, k in enumerate( | |
[ | |
"p", | |
"b", | |
"t", | |
"d", | |
"c", | |
"k", | |
"g", | |
"'", | |
"f", | |
"s", | |
"h", | |
"m", | |
"n", | |
"ŋ", | |
"w", | |
"l", | |
"r", | |
"j", | |
], | |
1, | |
) | |
} | |
), | |
"v": DiscreteProbabilities({"a": 0.5, "ə": 0.33333333, "i": 0.25, "u": 0.2}), | |
} | |
def morphemes(inventory): | |
nouns = DiscreteProbabilities( | |
{ | |
( | |
inventory["c"].generate(), | |
inventory["v"].generate(), | |
inventory["c"].generate(), | |
inventory["v"].generate(), | |
): 1.0 | |
/ i | |
for i in range(1, 1001) | |
} | |
) | |
noun_affixes = DiscreteProbabilities( | |
{ | |
(inventory["c"].generate(), inventory["v"].generate()): 1.0 / i | |
for i in range(1, 11) | |
} | |
) | |
verbs = DiscreteProbabilities( | |
{ | |
( | |
inventory["c"].generate(), | |
inventory["v"].generate(), | |
inventory["c"].generate(), | |
inventory["v"].generate(), | |
): 1.0 | |
/ i | |
for i in range(1, 1001) | |
} | |
) | |
verb_affixes = DiscreteProbabilities( | |
{ | |
(inventory["c"].generate(), inventory["v"].generate()): 1.0 / i | |
for i in range(1, 11) | |
} | |
) | |
other_lexicon = DiscreteProbabilities( | |
{ | |
(inventory["c"].generate(), inventory["v"].generate()): 1.0 / i | |
for i in range(1, 101) | |
} | |
) | |
other_grammar = DiscreteProbabilities( | |
{ | |
(inventory["c"].generate(), inventory["v"].generate()): 1.0 / i | |
for i in range(1, 101) | |
} | |
) | |
return { | |
"n": nouns, | |
"ng": noun_affixes, | |
"v": verbs, | |
"vg": verb_affixes, | |
"o": other_lexicon, | |
"og": other_grammar, | |
} | |
def sentence(morphemes, g=Counter(), l=Counter()): | |
s = () | |
for i in range(numpy.random.randint(3, 10)): | |
segment = ["o", "o", "o", "n", "n", "v"][numpy.random.randint(6)] | |
form = morphemes[segment].generate() | |
if numpy.random.random() < 0.5: | |
morph = morphemes[segment + "g"].generate() | |
else: | |
morph = () | |
s = s + form + morph + (" ",) | |
g.update(morph) | |
l.update(form) | |
return "".join(s) + ".", g, l | |
inventory = phonemes() | |
morphemes = morphemes(inventory) | |
for i in range(130): | |
s, g, l = sentence(morphemes) | |
print(s) | |
print(g) | |
print(l) | |
print(sum(g.values()) + sum(l.values())) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment