Last active
January 24, 2022 21:38
-
-
Save madewokherd/0813c7793879919d71af538eddafc6e9 to your computer and use it in GitHub Desktop.
Wordle optimizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import collections | |
import math | |
import string | |
import sys | |
WORD_LENGTH = 5 | |
VALID_ONLY = False | |
HARD_MODE = False | |
WORST_GUESS = False | |
DORDLE = False | |
USE_ENTROPY = False | |
#COMMON_WORDS_FILE = '/usr/share/dict/words' | |
#COMMON_WORDS_FILE = 'all-words' | |
#EXTRA_WORDS_FILE = 'all-words' | |
#COMMON_WORDS_FILE = 'common-words' | |
#EXTRA_WORDS_FILE = 'extra-words' | |
COMMON_WORDS_FILE = '/usr/share/dict/words' | |
EXTRA_WORDS_FILE = '/dev/null' | |
# constraint kinds: | |
POS_MATCH = 0 # character at index {num} of the word is this letter | |
POS_NOMATCH = 1 # character at index {num} of the word is not this letter, AND at least one of this letter appears in the word | |
LETTER_MIN = 2 # at least {num} of this letter appears in the word, num >= 2 | |
LETTER_MAX = 3 # at most {num} of this letter appears in the word | |
constraint = collections.namedtuple('constraint', ['kind', 'letter', 'num']) | |
def all_constraints(word_length=WORD_LENGTH): | |
for i in range(word_length): | |
for letter in string.ascii_lowercase: | |
if i: | |
yield constraint(LETTER_MIN, letter, i+1) | |
yield constraint(LETTER_MAX, letter, i) | |
yield constraint(POS_MATCH, letter, i) | |
yield constraint(POS_NOMATCH, letter, i) | |
def test_constraint(con, word): | |
kind, letter, num = con | |
if kind == LETTER_MIN: | |
return word.count(letter) >= num | |
if kind == LETTER_MAX: | |
return word.count(letter) <= num | |
if kind == POS_MATCH: | |
return word[num] == letter | |
if kind == POS_NOMATCH: | |
return word[num] != letter and letter in word | |
all_words = set() | |
with open(COMMON_WORDS_FILE, 'r') as f: | |
all_letters = frozenset(string.ascii_lowercase) # avoid words with upper-case letters as they may be names | |
for line in f: | |
word = line.strip() | |
if len(word) == WORD_LENGTH and all_letters.issuperset(word): | |
all_words.add(word.lower()) | |
possible_words = all_words.copy() | |
guessable_words = all_words.copy() | |
with open(EXTRA_WORDS_FILE, 'r') as f: | |
all_letters = frozenset(string.ascii_lowercase) # avoid words with upper-case letters as they may be names | |
for line in f: | |
word = line.strip() | |
if len(word) == WORD_LENGTH and all_letters.issuperset(word): | |
guessable_words.add(word.lower()) | |
if DORDLE: | |
possible_words2 = possible_words.copy() | |
guessable_words2 = possible_words.copy() | |
words_by_constraint = {} | |
for con in all_constraints(): | |
kind, letter, num = con | |
matching_words = set() | |
for word in guessable_words if (VALID_ONLY or HARD_MODE) else possible_words: | |
if test_constraint(con, word): | |
matching_words.add(word) | |
words_by_constraint[con] = matching_words | |
def constraints_from_guess(guess_word, actual_word): | |
result = [] | |
for letter in set(guess_word): | |
guessed_count = guess_word.count(letter) | |
actual_count = actual_word.count(letter) | |
if guessed_count > actual_count: | |
# if we guessed more of this letter than appear in the actual word, we know the exact amount | |
yield LETTER_MAX, letter, actual_count | |
if actual_count >= 2: | |
yield LETTER_MIN, letter, actual_count | |
if actual_count: | |
for i in range(len(guess_word)): | |
if guess_word[i] == letter: | |
if actual_word[i] == letter: | |
yield POS_MATCH, letter, i | |
else: | |
yield POS_NOMATCH, letter, i | |
if guessed_count <= actual_count and guessed_count >= 2: | |
yield LETTER_MIN, letter, guessed_count | |
def get_possibility_sets(word, possible_words): | |
remaining_words = possible_words.copy() | |
while remaining_words: | |
possibility_set = remaining_words.copy() | |
actual_word = remaining_words.pop() | |
constraint_set = frozenset(constraints_from_guess(word, actual_word)) | |
for con in constraint_set: | |
possibility_set.intersection_update(words_by_constraint[con]) | |
assert actual_word in possibility_set | |
yield possibility_set | |
remaining_words.difference_update(possibility_set) | |
if USE_ENTROPY: | |
def score_word(word, possible_words): | |
entropy = 0.0 | |
for i in get_possibility_sets(word, possible_words): | |
probability = len(i) / len(possible_words) | |
entropy += probability * math.log2(probability) | |
return entropy, word not in possible_words | |
else: | |
def score_word(word, possible_words): | |
return max(len(x) for x in get_possibility_sets(word, possible_words)), word not in possible_words | |
def add_constraint(con, possible_words, guessable_words): | |
possible_words.intersection_update(words_by_constraint[con]) | |
if VALID_ONLY or (HARD_MODE and con.kind != LETTER_MAX): | |
guessable_words.intersection_update(words_by_constraint[con]) | |
def process_constraint_string(s): | |
# string format: | |
# Gln - Letter l is in position n | |
# Yln - Letter l is in the word but not in position n | |
# Lln - Letter l appears at least n times in the word, n >= 2 | |
# Uln - Letter l appears at most n times in the word (Ux0 would mean x does not appear in the word) | |
chars_to_kind = {'G': POS_MATCH, 'Y': POS_NOMATCH, 'L': LETTER_MIN, 'U': LETTER_MAX} | |
i = 0 | |
for token in s.split(): | |
if token == '/': | |
i = 1 | |
continue | |
kind = chars_to_kind[token[0]] | |
letter = token[1].lower() | |
num = int(token[2:]) | |
if i: | |
add_constraint(constraint(kind, letter, num), possible_words2, guessable_words2) | |
else: | |
add_constraint(constraint(kind, letter, num), possible_words, guessable_words) | |
process_constraint_string(' '.join(sys.argv[1:])) | |
while True: | |
print('Remaining words:', len(possible_words)) | |
if len(possible_words) <= 32: | |
print(' '.join(possible_words)) | |
if DORDLE: | |
print('Remaining words(2):', len(possible_words2)) | |
if len(possible_words2) <= 32: | |
print(' '.join(possible_words2)) | |
if len(possible_words2) == 1 and not WORST_GUESS: | |
# We found it! | |
print('Found(2):', list(possible_words2)[0]) | |
DORDLE = False | |
if len(possible_words) == 1 and not WORST_GUESS: | |
# We found it! | |
print('Found:', list(possible_words)[0]) | |
if DORDLE: | |
DORDLE = False | |
possible_words = possible_words2 | |
guessable_words = guessable_words2 | |
else: | |
sys.exit() | |
best_word = 'resin' | |
if WORST_GUESS: | |
best_word_score = 0, False | |
elif DORDLE: | |
best_word_score = len(possible_words) + len(possible_words2), 2 | |
else: | |
best_word_score = len(possible_words), True # score by the minimum possible remaining words after making this guess | |
for word in (guessable_words|guessable_words2) if DORDLE else guessable_words: | |
score = score_word(word, possible_words) | |
if DORDLE: | |
score2 = score_word(word, possible_words2) | |
score = score[0]+score2[0], score[1]+score2[1] | |
if (score > best_word_score) if WORST_GUESS else (score < best_word_score): | |
best_word = word | |
best_word_score = score | |
#print(word, score) | |
print('Guess:', best_word) | |
if USE_ENTROPY: | |
print('Entropy:', -best_word_score[0], '' if best_word_score[1] else '*') | |
else: | |
print('Remaining words after this guess:', best_word_score[0], '' if best_word_score[1] else '*') | |
process_constraint_string(input('Enter further constraints: ')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment