Last active
August 7, 2022 20:51
-
-
Save stevenkaras/dda8167dfd525ed2893121eb1f96a8d9 to your computer and use it in GitHub Desktop.
Wordle DIsjoint Set
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# /usr/bin/env python | |
# The point of this exercise is to find a list of 5 words with 25 unique letters across them. | |
# As suggested by https://www.youtube.com/watch?v=_-AfhLQfb6w | |
# | |
# This problem is equivalent to searching for a disjoint set in the graph induced on shared letters between words. | |
# That problem is equivalent to searching for a clique in the inverse graph. | |
# The approach used here builds the graph as edges first between letters, then between words. | |
# Each step increases the cliques it found by 1 word. | |
# | |
# For the pre-NYT wordle list, there are 10 solutions across 5144 words with unique letters, which induce 4651264 edges in the graph. | |
# Disjoint sets are as follows: 2=2325632, 3=57262297, 4=12058120, 5=10 | |
# On my 10-year old laptop, it takes about an hour to discover all results | |
# Loaded list of 5144 words | |
# Built word edge list with 4651264 edges | |
# took 4.800 seconds | |
# First step produced 2325632 candidates | |
# took 4.129 seconds | |
# Second step produced 57262297 candidates | |
# took 275.470 seconds | |
# Third step produced 12058120 candidates | |
# took 2420.292 seconds | |
# Fourth step produced 10 solutions | |
# took 557.635 seconds | |
# {('clipt', 'jumby', 'kreng', 'vozhd', 'waqfs'), ('brung', 'kempt', 'vozhd', 'waqfs', 'xylic'), ('bling', 'jumpy', 'treck', 'vozhd', 'waqfs'), ('glent', 'jumby', 'prick', 'vozhd', 'waqfs'), ('jumby', 'pling', 'treck', 'vozhd', 'waqfs'), ('chunk', 'fjord', 'gymps', 'vibex', 'waltz'), ('bemix', 'clunk', 'grypt', 'vozhd', 'waqfs'), ('brick', 'glent', 'jumpy', 'vozhd', 'waqfs'), ('fjord', 'gucks', 'nymph', 'vibex', 'waltz'), ('blunk', 'cimex', 'grypt', 'vozhd', 'waqfs')} | |
from collections.abc import Iterable | |
import contextlib | |
import time | |
import functools | |
@contextlib.contextmanager | |
def timed(*, start=None, title=None): | |
if title is None: | |
title = "" | |
else: | |
title += " " | |
if start is None: | |
start = time.time() | |
yield | |
ended = time.time() | |
print(f"{title}took {ended - start:.3f} seconds") | |
def load_word_list(path) -> set[str]: | |
with open(path) as f: | |
words = f.read().split("\n") | |
words = [word.lower() for word in words] # lowercase all the words | |
words = [word for word in words if word.isalpha() and word.isascii()] # discard any words with non-ascii letters | |
words = [word for word in words if len(word) == 5] # discard words that aren't 5 letters long | |
words = [word for word in words if len(set(word)) == len(word)] # discard words with repeated letters (e.g. abase) | |
words = list({tuple(sorted(word)): word for word in words}.values()) # discard anagrams | |
return set(words) | |
def words_letters(words: Iterable[str]) -> set[str]: | |
return set(letter for word in words for letter in word) | |
def words_for_word(word: str, all_words: set[str], words_by_letter: dict[str, set[str]]) -> set[str]: | |
letters = set(word) | |
words_with_shared = set() | |
for letter in letters: | |
words_with_shared |= words_by_letter[letter] | |
return all_words - words_with_shared | |
def build_words_by_word(all_words: set[str]) -> dict[str, set[str]]: | |
all_letters = set(letter for word in all_words for letter in word) | |
words_by_letter = {letter: set(word for word in all_words if letter in word) for letter in all_letters} | |
words_by_word = {word: words_for_word(word, all_words, words_by_letter=words_by_letter) for word in all_words} | |
return words_by_word | |
def search_step(candidates: Iterable[tuple[str]], words_by_word: dict[str, set[str]]) -> set[tuple[str]]: | |
results = set() | |
for candidate in candidates: | |
next_steps = [words_by_word[word] for word in candidate] | |
next_words = functools.reduce(lambda a, b: a & b, next_steps) | |
for next_word in next_words: | |
results.add(tuple(sorted(list(candidate) + [next_word]))) | |
return results | |
def main(): | |
import argparse | |
parser = argparse.ArgumentParser( | |
description="Find sets of 5 words with 5 unique letters. AKA the wordle set challenge" | |
) | |
parser.add_argument( | |
"path", | |
metavar="WORDS", | |
default="/usr/share/dict/words", | |
help="The words file to use. Defaults to /usr/share/dict/words", | |
) | |
args = parser.parse_args() | |
all_words = load_word_list(args.path) | |
print(f"Loaded list of {len(all_words)} words") | |
with timed(): | |
words_by_word = build_words_by_word(all_words) | |
print(f"Built word edge list with {sum(len(ws) for ws in words_by_word.values())} edges") | |
with timed(): | |
step1 = search_step([(word,) for word in all_words], words_by_word=words_by_word) | |
print(f"First step produced {len(step1)} candidates") | |
with timed(): | |
step2 = search_step(step1, words_by_word=words_by_word) | |
print(f"Second step produced {len(step2)} candidates") | |
with timed(): | |
step3 = search_step(step2, words_by_word=words_by_word) | |
print(f"Third step produced {len(step3)} candidates") | |
with timed(): | |
step4 = search_step(step3, words_by_word=words_by_word) | |
print(f"Fourth step produced {len(step4)} solutions") | |
print(step4) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment