Created
January 22, 2022 03:36
Star
You must be signed in to star a gist
wordl brute-force (best run with pypy) (there is no CLI sorry)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# don't @ me | |
import collections | |
import random | |
import re | |
import string | |
LENGTH = 5 | |
class Datum: | |
__slots__ = ('min', 'max', 'positions') | |
def __init__(self): | |
self.min = 0 | |
self.max = LENGTH | |
self.positions = [None] * LENGTH | |
class Knowledge(list): | |
__slots__ = () | |
def __init__(self): | |
super().__init__() | |
for l in range(26): | |
self.append(Datum()) | |
@classmethod | |
def from_guess(cls, guess, target): | |
self = cls() | |
for i, letter in enumerate(guess): | |
data = self[letter - 97] | |
if target[i] == letter: | |
data.positions[i] = True | |
elif letter in target: | |
data.positions[i] = False | |
else: | |
data.max = 0 | |
data.positions = [False] * LENGTH | |
for letter in frozenset(guess): | |
our_count = guess.count(letter) | |
target_count = target.count(letter) | |
if our_count > target_count: | |
# We got at least one "no", so we know exactly how many there are | |
self[letter - 97].min = target_count | |
self[letter - 97].max = target_count | |
else: | |
# All we know is that there are at least as many as we guessed | |
self[letter - 97].min = our_count | |
self.think() | |
return self | |
@classmethod | |
def from_real_guess(cls, guess, results): | |
self = cls() | |
for i, (letter, result) in enumerate(zip(guess, results)): | |
data = self[letter - 97] | |
if result is True: | |
data.min += 1 | |
data.positions[i] = True | |
elif result is None: | |
data.min += 1 | |
data.positions[i] = False | |
elif result is False: | |
data.max = data.min | |
if data.max == 0: | |
data.positions = [False] * LENGTH | |
self.think() | |
return self | |
def think(self): | |
# If the sum of all the minimum placements is the length of the word, then we have all the | |
# letters, just not yet in the correct order | |
min_letters = sum(data.min for data in self) | |
if min_letters > LENGTH: | |
import pprint; pprint.pprint(self) | |
raise ValueError | |
elif min_letters == LENGTH: | |
# Cool beans, set everyone's max to min, and eliminate all other letters | |
for letter in range(26): | |
data = self[letter] | |
data.max = data.min | |
if data.max == 0: | |
data.positions = [False] * LENGTH | |
else: | |
# Otherwise, the max for every letter is the number of ambiguous spaces left | |
for letter in range(26): | |
data = self[letter] | |
remaining = LENGTH - min_letters + data.min | |
if remaining < data.max: | |
data.max = remaining | |
# If any letter must go in at least N places, and only N places are left for it to go, then | |
# we know where it must go | |
for letter in range(26): | |
data = self[letter] | |
possible_positions = LENGTH - data.positions.count(False) | |
if data.min > possible_positions: | |
import pprint; pprint.pprint(self) | |
raise ValueError | |
elif data.min == possible_positions: | |
data.max = data.min | |
for i in range(LENGTH): | |
if data.positions[i] is not False: | |
data.positions[i] = True | |
def merge(self, other): | |
for letter in range(26): | |
data = self[letter] | |
other_data = other[letter] | |
data.min = max(data.min, other_data.min) | |
data.max = min(data.max, other_data.max) | |
for i in range(LENGTH): | |
pos = other_data.positions[i] | |
if pos is not None: | |
data.positions[i] = pos | |
self.think() | |
def matches(self, word): | |
for i in range(LENGTH): | |
data = self[word[i] - 97] | |
if data.positions[i] is False: | |
return False | |
for letter in range(26): | |
data = self[letter] | |
count = word.count(letter + 97) | |
if not data.min <= count <= data.max: | |
return False | |
return True | |
words = set() | |
with open('/usr/share/dict/words') as f: | |
for line in f: | |
word = line.strip().lower() | |
if re.fullmatch('[a-z]{' + str(LENGTH) + '}', word): | |
words.add(word.lower().encode('ascii')) | |
wordlist = list(sorted(words)) | |
total_knowledge = Knowledge() | |
#total_knowledge.merge(Knowledge.from_real_guess(b'ceremonials', (None, False, None, False, False, False, None, None, None, True, False))) | |
#total_knowledge.merge(Knowledge.from_real_guess(b'bricklaying', (False, True, True, None, False, None, None, None, None, None, False))) | |
total_knowledge.merge(Knowledge.from_real_guess(b'arose', (False, False, False, False, False))) | |
#total_knowledge.merge(Knowledge.from_real_guess(b'linty', (False, False, False, False, True))) | |
#total_knowledge.merge(Knowledge.from_real_guess(b'champ', (False, False, False, False, None))) | |
#total_knowledge.merge(Knowledge.from_real_guess(b'guppy', (False, True, None, False, True))) | |
remaining_words = [word for word in words if total_knowledge.matches(word)] | |
def choose_guess(wordlist, remaining_words, total_knowledge): | |
best_score = 0 | |
best_guesses = [] | |
if len(remaining_words) == 1: | |
# We're done! | |
wordl, = remaining_words | |
print(wordl.decode('ascii')) | |
return | |
# For each possible guess we could make here, calculate a score, which is the fewest number of | |
# candidates it might eliminate across ALL possible games. Then pick the highest scoring guess. | |
for possible_guess in wordlist: | |
# TODO skip guesses that cannot possibly provide new information, e.g. those made entirely of | |
# letters we already know are not present? | |
score = None | |
for possible_target in remaining_words: | |
knowledge = Knowledge.from_guess(possible_guess, possible_target) | |
knowledge.merge(total_knowledge) | |
eliminations = 0 | |
for candidate in remaining_words: | |
if not knowledge.matches(candidate): | |
eliminations += 1 | |
if score is None or eliminations < score: | |
score = eliminations | |
if score < best_score: | |
# We cannot possibly do better with this guess, so stop here | |
break | |
if score > best_score: | |
best_score = score | |
best_guesses = [possible_guess] | |
elif score == best_score: | |
best_guesses.append(possible_guess) | |
print("best score:", best_score, best_guesses) | |
# Prefer guesses that might themselves be the word, if possible | |
for guess in best_guesses: | |
if guess in remaining_words: | |
return guess | |
# Otherwise, idk, use the first one | |
return best_guesses[0] | |
print(choose_guess(wordlist, remaining_words, total_knowledge)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment