Skip to content

Instantly share code, notes, and snippets.

@stevenkaras
Last active August 7, 2022 20:51
Show Gist options
  • Save stevenkaras/dda8167dfd525ed2893121eb1f96a8d9 to your computer and use it in GitHub Desktop.
Save stevenkaras/dda8167dfd525ed2893121eb1f96a8d9 to your computer and use it in GitHub Desktop.
Wordle DIsjoint Set
# /usr/bin/env python
# The point of this exercise is to find a list of 5 words with 25 unique letters across them.
# As suggested by https://www.youtube.com/watch?v=_-AfhLQfb6w
#
# This problem is equivalent to searching for a disjoint set in the graph induced on shared letters between words.
# That problem is equivalent to searching for a clique in the inverse graph.
# The approach used here builds the graph as edges first between letters, then between words.
# Each step increases the cliques it found by 1 word.
#
# For the pre-NYT wordle list, there are 10 solutions across 5144 words with unique letters, which induce 4651264 edges in the graph.
# Disjoint sets are as follows: 2=2325632, 3=57262297, 4=12058120, 5=10
# On my 10-year old laptop, it takes about an hour to discover all results
# Loaded list of 5144 words
# Built word edge list with 4651264 edges
# took 4.800 seconds
# First step produced 2325632 candidates
# took 4.129 seconds
# Second step produced 57262297 candidates
# took 275.470 seconds
# Third step produced 12058120 candidates
# took 2420.292 seconds
# Fourth step produced 10 solutions
# took 557.635 seconds
# {('clipt', 'jumby', 'kreng', 'vozhd', 'waqfs'), ('brung', 'kempt', 'vozhd', 'waqfs', 'xylic'), ('bling', 'jumpy', 'treck', 'vozhd', 'waqfs'), ('glent', 'jumby', 'prick', 'vozhd', 'waqfs'), ('jumby', 'pling', 'treck', 'vozhd', 'waqfs'), ('chunk', 'fjord', 'gymps', 'vibex', 'waltz'), ('bemix', 'clunk', 'grypt', 'vozhd', 'waqfs'), ('brick', 'glent', 'jumpy', 'vozhd', 'waqfs'), ('fjord', 'gucks', 'nymph', 'vibex', 'waltz'), ('blunk', 'cimex', 'grypt', 'vozhd', 'waqfs')}
from collections.abc import Iterable
import contextlib
import time
import functools
@contextlib.contextmanager
def timed(*, start=None, title=None):
if title is None:
title = ""
else:
title += " "
if start is None:
start = time.time()
yield
ended = time.time()
print(f"{title}took {ended - start:.3f} seconds")
def load_word_list(path) -> set[str]:
with open(path) as f:
words = f.read().split("\n")
words = [word.lower() for word in words] # lowercase all the words
words = [word for word in words if word.isalpha() and word.isascii()] # discard any words with non-ascii letters
words = [word for word in words if len(word) == 5] # discard words that aren't 5 letters long
words = [word for word in words if len(set(word)) == len(word)] # discard words with repeated letters (e.g. abase)
words = list({tuple(sorted(word)): word for word in words}.values()) # discard anagrams
return set(words)
def words_letters(words: Iterable[str]) -> set[str]:
return set(letter for word in words for letter in word)
def words_for_word(word: str, all_words: set[str], words_by_letter: dict[str, set[str]]) -> set[str]:
letters = set(word)
words_with_shared = set()
for letter in letters:
words_with_shared |= words_by_letter[letter]
return all_words - words_with_shared
def build_words_by_word(all_words: set[str]) -> dict[str, set[str]]:
all_letters = set(letter for word in all_words for letter in word)
words_by_letter = {letter: set(word for word in all_words if letter in word) for letter in all_letters}
words_by_word = {word: words_for_word(word, all_words, words_by_letter=words_by_letter) for word in all_words}
return words_by_word
def search_step(candidates: Iterable[tuple[str]], words_by_word: dict[str, set[str]]) -> set[tuple[str]]:
results = set()
for candidate in candidates:
next_steps = [words_by_word[word] for word in candidate]
next_words = functools.reduce(lambda a, b: a & b, next_steps)
for next_word in next_words:
results.add(tuple(sorted(list(candidate) + [next_word])))
return results
def main():
import argparse
parser = argparse.ArgumentParser(
description="Find sets of 5 words with 5 unique letters. AKA the wordle set challenge"
)
parser.add_argument(
"path",
metavar="WORDS",
default="/usr/share/dict/words",
help="The words file to use. Defaults to /usr/share/dict/words",
)
args = parser.parse_args()
all_words = load_word_list(args.path)
print(f"Loaded list of {len(all_words)} words")
with timed():
words_by_word = build_words_by_word(all_words)
print(f"Built word edge list with {sum(len(ws) for ws in words_by_word.values())} edges")
with timed():
step1 = search_step([(word,) for word in all_words], words_by_word=words_by_word)
print(f"First step produced {len(step1)} candidates")
with timed():
step2 = search_step(step1, words_by_word=words_by_word)
print(f"Second step produced {len(step2)} candidates")
with timed():
step3 = search_step(step2, words_by_word=words_by_word)
print(f"Third step produced {len(step3)} candidates")
with timed():
step4 = search_step(step3, words_by_word=words_by_word)
print(f"Fourth step produced {len(step4)} solutions")
print(step4)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment