Skip to content

Instantly share code, notes, and snippets.

@dwd
Created January 19, 2022 08:42
Show Gist options
  • Save dwd/e2012c803b733314284ca2d4b1b1719e to your computer and use it in GitHub Desktop.
Save dwd/e2012c803b733314284ca2d4b1b1719e to your computer and use it in GitHub Desktop.
from typing import Optional
class WordList:
words: list[str]
last: Optional[str]
mask: list[str]
known: set[str]
wrong: set[str]
blocked: list[set[str]]
init_lf: dict[str, int]
init_count: int
def __init__(self, filename: str):
self.words = []
self.last = None
self.mask = ['.' for _ in range(5)]
self.known = set()
self.wrong = set()
self.blocked = [set() for _ in range(5)]
with open(filename) as f:
for word in f:
word = word.strip()
if len(word) != 5:
continue
if word.lower() != word:
continue
if not word.isascii():
continue
if not word.isalpha():
continue
# Wordle appears to only use words that have no repeated letters?
used = set()
ok = True
for letter in word:
if letter in used:
ok = False
break
used.add(letter)
if not ok:
continue
self.words.append(word)
self.init_lf, self.init_count = self.letter_freq()
def letter_freq(self) -> tuple[dict[str, int], int]:
letter_freq = {}
for word in self.words:
letters = set()
for letter in word:
if letter in letters:
continue
letter_freq[letter] = letter_freq.get(letter, 0) + 1
letters.add(letter)
return letter_freq, len(self.words)
def process_result(self, result: str) -> None:
if len(result) != 5:
raise ValueError("Length of result must be 5")
for pos, c in enumerate(result):
if c == '.':
if self.last is not None:
self.wrong.add(self.last[pos])
continue
if not c.isalpha():
raise ValueError("Mask must contain lower/upper letters or dots")
if c.lower() in self.wrong:
raise ValueError("Mask contains wrong letters")
if c.upper() == c:
if self.mask[pos] not in (c.lower(), '.'):
raise ValueError("Mask wrong?")
self.mask[pos] = c.lower()
if c.lower() == c:
self.blocked[pos].add(c)
self.known.add(c.lower())
def filter(self) -> None:
new_words = []
for word in self.words:
bad_word = False
for pos, c in enumerate(word):
if self.mask[pos] != '.' and c != self.mask[pos]:
bad_word = True
break
if c in self.wrong:
bad_word = True
break
if c in self.blocked[pos]:
bad_word = True
break
if len(self.known) >= 5 and c not in self.known:
bad_word = True
break
if bad_word:
continue
for letter in self.known:
if letter not in word:
bad_word = True
break
if bad_word:
continue
new_words.append(word)
self.words = new_words
def score(self) -> list[tuple[str, int]]:
letter_freq, count = self.letter_freq()
scored_words = []
for word in self.words:
score = 0
for letter in word:
score += (letter_freq[letter] / count)
# score += (self.init_lf[letter] / self.init_count) * 0.5
score *= 1000
scored_words.append((word, int(score)))
scored_words.sort(key=lambda x: x[1], reverse=True)
return scored_words
def guess(self) -> str:
return self.score()[0][0]
def next(self, guess, result) -> str:
self.last = guess
self.process_result(result)
self.filter()
return self.guess()
@Fishbowler
Copy link

Wordle appears to only use words that have no repeated letters

I got scuppered once by this false assumption

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment