Skip to content

Instantly share code, notes, and snippets.

@tylerneylon
Created June 2, 2021 01:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tylerneylon/0eca66eb0b4a4165f2e72530bdd1cc03 to your computer and use it in GitHub Desktop.
Save tylerneylon/0eca66eb0b4a4165f2e72530bdd1cc03 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
""" some_random_words.py
Usage:
./some_random_words.py [letters_start_w_key_letter]
Prints out a bunch of random words that use these letters,
and which include the key letter.
"""
import random
import sys
from collections import Counter
CHEAT_MODE = True
WORD_LEN = 20
def load_bigram_model(letters):
with open('/usr/share/dict/words') as f:
words = [line.strip().lower() for line in f]
if CHEAT_MODE:
words = [
word
for word in words
if (all(let in letters for let in word) and letters[0] in word)
]
bigram_counter = Counter()
print('Analyzing all words .. ', end='', flush=True)
for word in words:
word_ = f'_{word}_'
for a, b in zip(word_[:-1], word_[1:]):
bigram_counter[a + b] += 1
alpha = 'abcdefghijklmnopqrstuvwxyz_'
# For each letter L, next_let_hist[L] will be a list of the form:
# [(M, prob_of_M_given_L), ...]
next_let_hist = {}
for let in alpha:
next_set = [
(let2, bigram_counter[let + let2] + 1)
for let2 in alpha
]
total = sum(pair[1] for pair in next_set)
next_set = [
(pair[0], pair[1] / total)
for i, pair in enumerate(next_set)
]
next_let_hist[let] = next_set
print('done!')
return next_let_hist
def get_random_next_letter(this_letter, next_set):
u = random.random()
index, total = 0, next_set[0][1]
while total < u:
index += 1
total += next_set[index][1]
return next_set[index][0]
def filter_to_ok_letters(ok_letters, next_set):
filtered_set = [pair for pair in next_set if pair[0] in ok_letters]
total = sum(pair[1] for pair in filtered_set)
return [(pair[0], pair[1] / total) for pair in filtered_set]
def generate_random_word(ok_letters, next_let_hist):
letters = ['_']
while True:
next_set = next_let_hist[letters[-1]]
ok = ok_letters if len(letters) == 1 else ok_letters + '_'
next_set = filter_to_ok_letters(ok, next_set)
letters.append(get_random_next_letter(letters[-1], next_set))
if letters[-1] == '_':
break
word = ''.join(letters)[1:-1]
if word == '':
word = generate_random_word(next_let_hist)
return word
if __name__ == '__main__':
if len(sys.argv) < 2:
print(__doc__)
sys.exit(0)
letters = sys.argv[1]
key_letter = letters[0]
next_let_hist = load_bigram_model(letters)
for _ in range(500):
word = ''
while key_letter not in word:
word = generate_random_word(letters, next_let_hist)
print(word)
if False:
word = []
while key_letter not in word:
word = [random.choice(letters) for _ in range(20)]
print(''.join(word))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment