Skip to content

Instantly share code, notes, and snippets.

@SmileyChris
Created August 30, 2018 22:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save SmileyChris/01329ace51df56653abb80c8044bbe21 to your computer and use it in GitHub Desktop.
Save SmileyChris/01329ace51df56653abb80c8044bbe21 to your computer and use it in GitHub Desktop.
import random
from typing import Optional, Any, List, Tuple
consonant_initial_digraphs = set(
["ch", "sh", "th", "thr", "ph", "wh", "ck", "kn", "wr"]
)
consonant_final_digraphs = set(["ch", "ng", "sh", "th", "tch"])
vowel_digraphs = [
"ai",
"au",
"aw",
"ay",
"ea",
"ee",
"ei",
"ew",
"ie",
"oa",
"oo",
"ou",
"ow",
"y", # not digraph, but special case
]
consonant_initial_blends = set(
[
"sc",
"sm",
"st",
"sk",
"sn",
"sw",
"sl",
"sp",
"bl",
"gl",
"cl",
"pl",
"fl",
"br",
"fr",
"tr",
"cr",
"gr",
"dr",
"pr",
]
)
consonant_final_blends = set(
[
"sk",
"sp",
"st",
"ld",
"lf",
"lk",
"lp",
"lt",
"nd",
"nk",
"nt",
"ft",
"mp",
"pt",
"rt",
]
)
weighted_letters = {
"a": 8.167,
"b": 1.492,
"c": 2.782,
"d": 4.253,
"e": 12.702,
"f": 2.228,
"g": 2.015,
"h": 6.094,
"i": 6.966,
"j": 0.153,
"k": 0.772,
"l": 4.025,
"m": 2.406,
"n": 6.749,
"o": 7.507,
"p": 1.929,
"q": 0.095,
"r": 5.987,
"s": 6.327,
"t": 9.056,
"u": 2.758,
"v": 0.978,
"w": 2.360,
"x": 0.150,
"y": 1.974,
"z": 0.074,
}
class WeightedRandomizer:
def __init__(self, weighted_items: List[Tuple[Any, float]]) -> None:
self._max = 0.0
self._weights = []
for item, weight in sorted(weighted_items, key=lambda o: o[1]):
self._max += weight
self._weights.append((self._max, item))
def random(self) -> Any:
r = random.random() * self._max
for ceil, item in self._weights:
if ceil > r:
return item
def __str__(self) -> str:
return "".join(sorted(item for (c, item) in self._weights))
simple_vowels = WeightedRandomizer(
item for item in weighted_letters.items() if item[0] in "aoeiu"
)
simple_consonants = WeightedRandomizer(
item for item in weighted_letters.items() if item[0] not in "aoeiu"
)
punctuation = WeightedRandomizer([(".", 0.9), ("?", 0.07), ("!", 0.03)])
def new_sylable(force_hard: bool = False) -> str:
simple_vowel = random.randint(0, 8)
if simple_vowel:
vowel = simple_vowels.random()
else:
vowel = random.choice(vowel_digraphs)
if force_hard:
structure = random.randint(0, 1)
else:
structure = random.randint(0, 2)
consonant_simplicity = random.randint(0, 9)
simple_initial_consonant = consonant_simplicity != 0
simple_final_consonant = consonant_simplicity != 1
if structure in (0, 1):
if simple_initial_consonant:
start = simple_consonants.random()
else:
start = random.choice(
list(consonant_initial_digraphs | consonant_initial_blends)
)
else:
start = ""
open_sylable = structure == 0
if structure in (1, 2):
if simple_final_consonant:
end = simple_consonants.random()
open_sylable = end[-1] in ("w", "h", "y")
if not open_sylable:
open_sylable = not random.randint(0, 4)
if open_sylable:
end += "e"
else:
end = random.choice(list(consonant_final_digraphs | consonant_final_blends))
else:
end = ""
return start + vowel + end, open_sylable
def new_word(length: Optional[int] = None, plural: Optional[bool] = None) -> str:
if length is None:
length = random.randint(1, 3)
if plural is None:
plural = not random.randint(0, 4)
force_hard = False
sylables = []
for i in range(length):
sylable, force_hard = new_sylable(force_hard)
sylables.append(sylable)
word = "".join(sylables)
if plural and not word.endswith("s"):
return word + "s"
return word
def new_text(paragraphs: int) -> List[str]:
complexity = min(40, 20 * paragraphs // 3)
short = [new_word(length=1, plural=False) for i in range(complexity // 2)]
longer = []
for i in range(complexity):
word = new_word(random.randint(2, 3))
if not random.randint(0, 6):
word = word.capitalize()
if not random.randint(0, 2):
word = short[random.randint(0, 2)] + " " + word
longer.append(word)
quote = WeightedRandomizer(
[(new_word(length=1, plural=False), i * 4) for i in range(3)]
)
return [
" ".join(
new_sentence(short, longer, quote) for i in range(random.randint(1, 5))
)
for i in range(paragraphs)
]
def new_sentence(short: List[str], longer: List[str], quote: WeightedRandomizer) -> str:
longer_chance = 0.4
sentence = []
for i in range(random.randint(4, 8)):
if random.random() > longer_chance:
longer_chance -= 0.3
word = random.choice(longer)
else:
longer_chance += 0.1
word = random.choice(short)
sentence.append(word)
sentence = " ".join(sentence) + punctuation.random()
if not random.randint(0, 8):
quote_text = [random.choice(longer), quote.random()]
random.shuffle(quote_text)
quote_text = " ".join(quote_text)
if random.randint(0, 1):
if sentence.endswith("."):
sentence = "{},".format(sentence[:-1])
sentence = '"{}" {}.'.format(sentence.capitalize(), quote_text)
else:
sentence = '{} "{}"'.format(quote_text.capitalize(), sentence)
else:
sentence = sentence.capitalize()
return sentence
if __name__ == "__main__":
import sys
try:
sentences = int(sys.argv[1])
except (IndexError, ValueError):
sentences = 10
print("\n\n".join(new_text(sentences)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment