Skip to content

Instantly share code, notes, and snippets.

@Anaphory
Created March 2, 2021 13:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Anaphory/7e55a7834488164e26dfb75fc1442fd7 to your computer and use it in GitHub Desktop.
Save Anaphory/7e55a7834488164e26dfb75fc1442fd7 to your computer and use it in GitHub Desktop.
Small Python script to generate simple sentences for a synthetic language with minimal concatenative morphology
import random
import bisect
from collections import Counter
class DiscreteProbabilities:
def __init__(self, ps):
keys, values = zip(*sorted(ps.items(), key=lambda kv: kv[1], reverse=True))
self.values = numpy.cumsum(values)
self.values /= self.values[-1]
self.keys = keys
def generate(self):
x = numpy.random.random()
i = bisect.bisect(self.values, x)
return self.keys[i]
def phonemes():
"""Generate the phoneme inventory of a simlang"""
return {
"c": DiscreteProbabilities(
{
k: 1 / i
for i, k in enumerate(
[
"p",
"b",
"t",
"d",
"c",
"k",
"g",
"'",
"f",
"s",
"h",
"m",
"n",
"ŋ",
"w",
"l",
"r",
"j",
],
1,
)
}
),
"v": DiscreteProbabilities({"a": 0.5, "ə": 0.33333333, "i": 0.25, "u": 0.2}),
}
def morphemes(inventory):
nouns = DiscreteProbabilities(
{
(
inventory["c"].generate(),
inventory["v"].generate(),
inventory["c"].generate(),
inventory["v"].generate(),
): 1.0
/ i
for i in range(1, 1001)
}
)
noun_affixes = DiscreteProbabilities(
{
(inventory["c"].generate(), inventory["v"].generate()): 1.0 / i
for i in range(1, 11)
}
)
verbs = DiscreteProbabilities(
{
(
inventory["c"].generate(),
inventory["v"].generate(),
inventory["c"].generate(),
inventory["v"].generate(),
): 1.0
/ i
for i in range(1, 1001)
}
)
verb_affixes = DiscreteProbabilities(
{
(inventory["c"].generate(), inventory["v"].generate()): 1.0 / i
for i in range(1, 11)
}
)
other_lexicon = DiscreteProbabilities(
{
(inventory["c"].generate(), inventory["v"].generate()): 1.0 / i
for i in range(1, 101)
}
)
other_grammar = DiscreteProbabilities(
{
(inventory["c"].generate(), inventory["v"].generate()): 1.0 / i
for i in range(1, 101)
}
)
return {
"n": nouns,
"ng": noun_affixes,
"v": verbs,
"vg": verb_affixes,
"o": other_lexicon,
"og": other_grammar,
}
def sentence(morphemes, g=Counter(), l=Counter()):
s = ()
for i in range(numpy.random.randint(3, 10)):
segment = ["o", "o", "o", "n", "n", "v"][numpy.random.randint(6)]
form = morphemes[segment].generate()
if numpy.random.random() < 0.5:
morph = morphemes[segment + "g"].generate()
else:
morph = ()
s = s + form + morph + (" ",)
g.update(morph)
l.update(form)
return "".join(s) + ".", g, l
inventory = phonemes()
morphemes = morphemes(inventory)
for i in range(130):
s, g, l = sentence(morphemes)
print(s)
print(g)
print(l)
print(sum(g.values()) + sum(l.values()))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment