Strilanc/find_wordle_quintuples.py

## find_wordle_quintuples.py
from typing import Iterable, Tuple

import time
import numpy as np


def to_mask(word: str) -> int:
    """Converts a word into a bitmask where each bit indicates presence of a letter."""
    t = 0
    for c in word:
        t |= 1 << (ord(c) - ord('a'))
    return t


def uniques_to_np32(masks: Iterable[int]) -> np.ndarray:
    return np.array(sorted(set(masks)), dtype=np.uint32)


def main():
    t0 = time.monotonic()

    # Got word list from
    # https://gist.githubusercontent.com/dracos/dd0668f281e685bad51479e5acaadb93/raw/ca9018b32e963292473841fb55fd5a62176769b5/valid-wordle-words.txt
    with open('word_list.txt') as f:
        words = f.read().splitlines()

    # Compute unique single-word masks.
    singlet_masks = uniques_to_np32(to_mask(word) for word in words if len(set(word)) == 5)
    num_singlets = len(singlet_masks)

    # Compute unique double-word masks.
    unions = np.repeat(singlet_masks, repeats=len(singlet_masks)).reshape((num_singlets, num_singlets))
    overlaps = unions.copy()
    for letter in range(num_singlets):
        unions[:, letter] |= singlet_masks[letter]
        overlaps[:, letter] &= singlet_masks[letter]
    unions.shape = (num_singlets * num_singlets,)
    overlaps.shape = (num_singlets * num_singlets,)
    doublet_masks = uniques_to_np32(unions[overlaps == 0])

    # Compute unique double-word-and-one-extra-bit masks.
    known_extrabitdoublets = set()
    for letter in range(26):
        extrabitdoublets = doublet_masks | (1 << letter)
        for k2 in np.flatnonzero(extrabitdoublets != doublet_masks):
            known_extrabitdoublets.add(extrabitdoublets[k2])


    # Search for singlet+doublet mask combos whose leftovers are a double-word-and-an-extra-bit.
    def iter_singlet_doublet_extrabitdoublets() -> Iterable[Tuple[int, int, int]]:
        for singlet in singlet_masks:
            for working_doublet_index in np.flatnonzero((singlet & doublet_masks) == 0):
                doublet = doublet_masks[working_doublet_index]
                extrabitdoublet = (singlet | doublet) ^ ((1 << 26) - 1)
                if extrabitdoublet in known_extrabitdoublets:
                    yield singlet, doublet, extrabitdoublet

    # Have a way to map solved masks back to words.
    mask_to_word = {to_mask(word): word for word in words}

    seen_results = set()
    for singlet, doublet, extrabitdoublet in iter_singlet_doublet_extrabitdoublets():
        # Find location of the doublet in the outer or table. Index tells original two.
        doublet_index = np.flatnonzero(unions == doublet)[0]

        # Find the extra bit and location of the leftover doublet in the outer or table. Index tells original two.
        for letter in range(26):
            extrabit = 1 << letter
            if extrabit & extrabitdoublet:
                doublet2_matches = np.flatnonzero(unions == extrabitdoublet & ~extrabit)
                if len(doublet2_matches):
                    doublet_index2 = doublet2_matches[0]
                    break
        else:
            raise NotImplementedError("extrabitdoublet wasn't a doublet with an extra bit")

        # Convert indices into masks.
        word_masks = [
            singlet_masks[doublet_index // len(singlet_masks)],
            singlet_masks[doublet_index % len(singlet_masks)],
            singlet_masks[doublet_index2 // len(singlet_masks)],
            singlet_masks[doublet_index2 % len(singlet_masks)],
            singlet,
        ]

        # Convert masks into words.
        words = tuple(sorted(mask_to_word[m] for m in word_masks))
        if words not in seen_results:
            seen_results.add(words)
            print("found", words, 'after', time.monotonic() - t0, 'seconds')

    print("finished searching after", time.monotonic() - t0, 'seconds')


if __name__ == '__main__':
    main()
	from typing import Iterable, Tuple

	import time
	import numpy as np


	def to_mask(word: str) -> int:
	"""Converts a word into a bitmask where each bit indicates presence of a letter."""
	t = 0
	for c in word:
	t \|= 1 << (ord(c) - ord('a'))
	return t


	def uniques_to_np32(masks: Iterable[int]) -> np.ndarray:
	return np.array(sorted(set(masks)), dtype=np.uint32)


	def main():
	t0 = time.monotonic()

	# Got word list from
	# https://gist.githubusercontent.com/dracos/dd0668f281e685bad51479e5acaadb93/raw/ca9018b32e963292473841fb55fd5a62176769b5/valid-wordle-words.txt
	with open('word_list.txt') as f:
	words = f.read().splitlines()

	# Compute unique single-word masks.
	singlet_masks = uniques_to_np32(to_mask(word) for word in words if len(set(word)) == 5)
	num_singlets = len(singlet_masks)

	# Compute unique double-word masks.
	unions = np.repeat(singlet_masks, repeats=len(singlet_masks)).reshape((num_singlets, num_singlets))
	overlaps = unions.copy()
	for letter in range(num_singlets):
	unions[:, letter] \|= singlet_masks[letter]
	overlaps[:, letter] &= singlet_masks[letter]
	unions.shape = (num_singlets * num_singlets,)
	overlaps.shape = (num_singlets * num_singlets,)
	doublet_masks = uniques_to_np32(unions[overlaps == 0])

	# Compute unique double-word-and-one-extra-bit masks.
	known_extrabitdoublets = set()
	for letter in range(26):
	extrabitdoublets = doublet_masks \| (1 << letter)
	for k2 in np.flatnonzero(extrabitdoublets != doublet_masks):
	known_extrabitdoublets.add(extrabitdoublets[k2])


	# Search for singlet+doublet mask combos whose leftovers are a double-word-and-an-extra-bit.
	def iter_singlet_doublet_extrabitdoublets() -> Iterable[Tuple[int, int, int]]:
	for singlet in singlet_masks:
	for working_doublet_index in np.flatnonzero((singlet & doublet_masks) == 0):
	doublet = doublet_masks[working_doublet_index]
	extrabitdoublet = (singlet \| doublet) ^ ((1 << 26) - 1)
	if extrabitdoublet in known_extrabitdoublets:
	yield singlet, doublet, extrabitdoublet

	# Have a way to map solved masks back to words.
	mask_to_word = {to_mask(word): word for word in words}

	seen_results = set()
	for singlet, doublet, extrabitdoublet in iter_singlet_doublet_extrabitdoublets():
	# Find location of the doublet in the outer or table. Index tells original two.
	doublet_index = np.flatnonzero(unions == doublet)[0]

	# Find the extra bit and location of the leftover doublet in the outer or table. Index tells original two.
	for letter in range(26):
	extrabit = 1 << letter
	if extrabit & extrabitdoublet:
	doublet2_matches = np.flatnonzero(unions == extrabitdoublet & ~extrabit)
	if len(doublet2_matches):
	doublet_index2 = doublet2_matches[0]
	break
	else:
	raise NotImplementedError("extrabitdoublet wasn't a doublet with an extra bit")

	# Convert indices into masks.
	word_masks = [
	singlet_masks[doublet_index // len(singlet_masks)],
	singlet_masks[doublet_index % len(singlet_masks)],
	singlet_masks[doublet_index2 // len(singlet_masks)],
	singlet_masks[doublet_index2 % len(singlet_masks)],
	singlet,
	]

	# Convert masks into words.
	words = tuple(sorted(mask_to_word[m] for m in word_masks))
	if words not in seen_results:
	seen_results.add(words)
	print("found", words, 'after', time.monotonic() - t0, 'seconds')

	print("finished searching after", time.monotonic() - t0, 'seconds')


	if __name__ == '__main__':
	main()