Skip to content

Instantly share code, notes, and snippets.

@dansanderson
Last active May 21, 2022 07:42
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dansanderson/f11577824cccc41747a913f5865cde5e to your computer and use it in GitHub Desktop.
Save dansanderson/f11577824cccc41747a913f5865cde5e to your computer and use it in GitHub Desktop.
Finds New York Times Spelling Bee puzzles in a word list
#!/usr/bin/env python3
#
# A tool to generate New York Times Spelling Bee puzzles from a word list.
#
# A Bee is a set of seven unique letters that can be used to form words. One
# of the letters is required to appear at least once in every word, and each
# word must be at least four letters long. Letters can be repeated. A Bee must
# have at least one word in its word list that uses all seven letters at least
# once (a "pangram").
#
# Usage:
# python3 beefinder.py <word-list-file> <required-letter> >data.json
#
# The tool outputs the Bees in JSON format, as follows:
#
# { "s": { // The required letter
# "abcdehs": [ // A Bee, with the letter bank as the key
# "aahs", // Answers in this Bee from the word list
# "hahas",
# "hahs",
# ...
# ], ...
# }}
import collections
import itertools
import json
import os
import sys
ORDA = ord('a')
def make_wordbank(word_seq):
"""Makes a word bank from a sequence of words.
A word bank contains sets of letters and the words from the word list that
can be made from them. Words shorter than 4 letters are ignored, as are
words that require letter banks larger than 7 letters. A given letter bank
may be smaller than 7 letters.
The result is a map of letter banks to lists of words. The letter bank is
*not* a string: it's a 26-bit integer, where each bit represents a letter
the bank, starting with 'a' as the least significant bit. This
representation makes it easy and fast to identify members and make Bees
(see bees()). Use bee_str() to convert this number to a string of letters.
Args:
word_seq: A sequence of words.
Returns:
The word bank, as a mapping of numeric letter banks to lists of words.
"""
bank = collections.defaultdict(list)
for word in word_seq:
# Word must be 4 letters or longer
if len(word) < 4:
continue
bankbits = 0
abort_word = False
for ltr in word:
if ltr < 'a' or ltr > 'z':
abort_word = True
break
bankbits |= 1 << ord(ltr)-ORDA
if abort_word:
continue
# Letterbank must be at most 7 letters
b = bankbits
c = 0
while b > 0:
c += b & 1
b >>= 1
if c > 7:
continue
bank[bankbits].append(word)
return bank
def words_from_file(pth):
"""A generator that yields words from a word list.
Args:
pth: The file path.
Yields:
A word. (Actually a full line with the newline removed.)
"""
with open(pth) as fh:
for line in fh:
word = line.strip()
yield word
def bees(bank, required_letter='s'):
"""Generates Bee puzzles from a word bank.
This scans all combinations of seven letters where one of the seven is
the given required letter, and queries the word bank for words that can be
made with the required letter and any of the six other letters.
Args:
bank: The word bank, in the form returned by make_wordbank().
required_letter: The required letter, as a lowercase single character
string. The default is 's'.
Returns:
The Bees, as a mapping of numeric letter banks to answer lists. (See
make_wordbank() for a description of numeric letter banks.)
"""
bees = collections.defaultdict(list)
reqnum = ord(required_letter)-ORDA
reqbit = 1 << reqnum
for banknums in itertools.combinations(range(25), 6):
bankbits = reqbit
for num in banknums:
if num >= reqnum:
num += 1
bankbits |= 1 << num
for b in bank:
if bankbits & b == b and b & reqbit:
bees[bankbits].extend(bank[b])
# Filter out bees that don't have pangrams
to_delete = []
for b in bees:
has_pangram = False
for word in bees[b]:
if len(set(word)) == 7:
has_pangram = True
break
if not has_pangram:
to_delete.append(b)
for b in to_delete:
del bees[b]
return bees
def bee_str(bee):
"""Converts a 26-bit numeric letter bank to a string letter bank."""
ltrs = []
for n in range(26):
if bee & (1 << n):
ltrs.append(chr(ORDA + n))
return ''.join(ltrs)
def do_usage(progname=None):
print(
f'Makes New York Times Spelling Bee puzzles.\n\n'
f'Usage: {progname or "python3 beefinder.py"} <word-list-file> '
f'<letter> >data.json\n'
f' word-list-file: The path to the word list file.\n'
f' letter: The required Bee letter. Must be between a and z.\n\n'
f'Output is a JSON file of all Bee puzzles based on the word list.',
file=sys.stderr)
sys.exit(1)
def main(args):
if len(args) != 3:
print('Missing required arguments.', file=sys.stderr)
do_usage(args[0])
wordlist_fname = args[1]
if not os.path.isfile(wordlist_fname):
print(f'"{args[1]}" is not a file.', file=sys.stderr)
do_usage(args[0])
reqltr = args[2]
if len(reqltr) != 1 or reqltr < 'a' or reqltr > 'z':
print(
f'"{args[2]}" must be a letter between a and z.',
file=sys.stderr)
do_usage(args[0])
bank = make_wordbank(words_from_file(wordlist_fname))
bs = bees(bank, required_letter=reqltr)
bs_as_letters = {}
for b in bs:
bs_as_letters[bee_str(b)] = bs[b]
print(json.dumps({reqltr: bs_as_letters}))
if __name__ == '__main__':
sys.exit(main(sys.argv))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment