Created
August 4, 2022 08:47
-
-
Save Strilanc/99dbb2150522974b547b51f659f281e7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Iterable, Tuple | |
import time | |
import numpy as np | |
def to_mask(word: str) -> int: | |
"""Converts a word into a bitmask where each bit indicates presence of a letter.""" | |
t = 0 | |
for c in word: | |
t |= 1 << (ord(c) - ord('a')) | |
return t | |
def uniques_to_np32(masks: Iterable[int]) -> np.ndarray: | |
return np.array(sorted(set(masks)), dtype=np.uint32) | |
def main(): | |
t0 = time.monotonic() | |
# Got word list from | |
# https://gist.githubusercontent.com/dracos/dd0668f281e685bad51479e5acaadb93/raw/ca9018b32e963292473841fb55fd5a62176769b5/valid-wordle-words.txt | |
with open('word_list.txt') as f: | |
words = f.read().splitlines() | |
# Compute unique single-word masks. | |
singlet_masks = uniques_to_np32(to_mask(word) for word in words if len(set(word)) == 5) | |
num_singlets = len(singlet_masks) | |
# Compute unique double-word masks. | |
unions = np.repeat(singlet_masks, repeats=len(singlet_masks)).reshape((num_singlets, num_singlets)) | |
overlaps = unions.copy() | |
for letter in range(num_singlets): | |
unions[:, letter] |= singlet_masks[letter] | |
overlaps[:, letter] &= singlet_masks[letter] | |
unions.shape = (num_singlets * num_singlets,) | |
overlaps.shape = (num_singlets * num_singlets,) | |
doublet_masks = uniques_to_np32(unions[overlaps == 0]) | |
# Compute unique double-word-and-one-extra-bit masks. | |
known_extrabitdoublets = set() | |
for letter in range(26): | |
extrabitdoublets = doublet_masks | (1 << letter) | |
for k2 in np.flatnonzero(extrabitdoublets != doublet_masks): | |
known_extrabitdoublets.add(extrabitdoublets[k2]) | |
# Search for singlet+doublet mask combos whose leftovers are a double-word-and-an-extra-bit. | |
def iter_singlet_doublet_extrabitdoublets() -> Iterable[Tuple[int, int, int]]: | |
for singlet in singlet_masks: | |
for working_doublet_index in np.flatnonzero((singlet & doublet_masks) == 0): | |
doublet = doublet_masks[working_doublet_index] | |
extrabitdoublet = (singlet | doublet) ^ ((1 << 26) - 1) | |
if extrabitdoublet in known_extrabitdoublets: | |
yield singlet, doublet, extrabitdoublet | |
# Have a way to map solved masks back to words. | |
mask_to_word = {to_mask(word): word for word in words} | |
seen_results = set() | |
for singlet, doublet, extrabitdoublet in iter_singlet_doublet_extrabitdoublets(): | |
# Find location of the doublet in the outer or table. Index tells original two. | |
doublet_index = np.flatnonzero(unions == doublet)[0] | |
# Find the extra bit and location of the leftover doublet in the outer or table. Index tells original two. | |
for letter in range(26): | |
extrabit = 1 << letter | |
if extrabit & extrabitdoublet: | |
doublet2_matches = np.flatnonzero(unions == extrabitdoublet & ~extrabit) | |
if len(doublet2_matches): | |
doublet_index2 = doublet2_matches[0] | |
break | |
else: | |
raise NotImplementedError("extrabitdoublet wasn't a doublet with an extra bit") | |
# Convert indices into masks. | |
word_masks = [ | |
singlet_masks[doublet_index // len(singlet_masks)], | |
singlet_masks[doublet_index % len(singlet_masks)], | |
singlet_masks[doublet_index2 // len(singlet_masks)], | |
singlet_masks[doublet_index2 % len(singlet_masks)], | |
singlet, | |
] | |
# Convert masks into words. | |
words = tuple(sorted(mask_to_word[m] for m in word_masks)) | |
if words not in seen_results: | |
seen_results.add(words) | |
print("found", words, 'after', time.monotonic() - t0, 'seconds') | |
print("finished searching after", time.monotonic() - t0, 'seconds') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment