Created
January 19, 2022 08:42
-
-
Save dwd/e2012c803b733314284ca2d4b1b1719e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Optional | |
class WordList: | |
words: list[str] | |
last: Optional[str] | |
mask: list[str] | |
known: set[str] | |
wrong: set[str] | |
blocked: list[set[str]] | |
init_lf: dict[str, int] | |
init_count: int | |
def __init__(self, filename: str): | |
self.words = [] | |
self.last = None | |
self.mask = ['.' for _ in range(5)] | |
self.known = set() | |
self.wrong = set() | |
self.blocked = [set() for _ in range(5)] | |
with open(filename) as f: | |
for word in f: | |
word = word.strip() | |
if len(word) != 5: | |
continue | |
if word.lower() != word: | |
continue | |
if not word.isascii(): | |
continue | |
if not word.isalpha(): | |
continue | |
# Wordle appears to only use words that have no repeated letters? | |
used = set() | |
ok = True | |
for letter in word: | |
if letter in used: | |
ok = False | |
break | |
used.add(letter) | |
if not ok: | |
continue | |
self.words.append(word) | |
self.init_lf, self.init_count = self.letter_freq() | |
def letter_freq(self) -> tuple[dict[str, int], int]: | |
letter_freq = {} | |
for word in self.words: | |
letters = set() | |
for letter in word: | |
if letter in letters: | |
continue | |
letter_freq[letter] = letter_freq.get(letter, 0) + 1 | |
letters.add(letter) | |
return letter_freq, len(self.words) | |
def process_result(self, result: str) -> None: | |
if len(result) != 5: | |
raise ValueError("Length of result must be 5") | |
for pos, c in enumerate(result): | |
if c == '.': | |
if self.last is not None: | |
self.wrong.add(self.last[pos]) | |
continue | |
if not c.isalpha(): | |
raise ValueError("Mask must contain lower/upper letters or dots") | |
if c.lower() in self.wrong: | |
raise ValueError("Mask contains wrong letters") | |
if c.upper() == c: | |
if self.mask[pos] not in (c.lower(), '.'): | |
raise ValueError("Mask wrong?") | |
self.mask[pos] = c.lower() | |
if c.lower() == c: | |
self.blocked[pos].add(c) | |
self.known.add(c.lower()) | |
def filter(self) -> None: | |
new_words = [] | |
for word in self.words: | |
bad_word = False | |
for pos, c in enumerate(word): | |
if self.mask[pos] != '.' and c != self.mask[pos]: | |
bad_word = True | |
break | |
if c in self.wrong: | |
bad_word = True | |
break | |
if c in self.blocked[pos]: | |
bad_word = True | |
break | |
if len(self.known) >= 5 and c not in self.known: | |
bad_word = True | |
break | |
if bad_word: | |
continue | |
for letter in self.known: | |
if letter not in word: | |
bad_word = True | |
break | |
if bad_word: | |
continue | |
new_words.append(word) | |
self.words = new_words | |
def score(self) -> list[tuple[str, int]]: | |
letter_freq, count = self.letter_freq() | |
scored_words = [] | |
for word in self.words: | |
score = 0 | |
for letter in word: | |
score += (letter_freq[letter] / count) | |
# score += (self.init_lf[letter] / self.init_count) * 0.5 | |
score *= 1000 | |
scored_words.append((word, int(score))) | |
scored_words.sort(key=lambda x: x[1], reverse=True) | |
return scored_words | |
def guess(self) -> str: | |
return self.score()[0][0] | |
def next(self, guess, result) -> str: | |
self.last = guess | |
self.process_result(result) | |
self.filter() | |
return self.guess() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I got scuppered once by this false assumption