Created
February 4, 2025 20:01
-
-
Save gmjen/d722a9d5e5224f3b214ee11a64ba7129 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
class WordleSolver: | |
"""A class for playing a Wordle Style game. Designed around building a simple | |
solver for the Irish language version of the game Foclach. | |
Object must be instantiated with a dataframe having the column structure as follows: | |
word chars 1 2 3 4 5 ... [letter_tokens] | |
Usage: instantiate a game using the corpus | |
>>> g = WordleGame(corpus_df) | |
>>> guess = g.get_next_guess(); guess | |
# input the guess into the game and get the clues in return | |
# you can either use an array of ['gray', 'yellow', 'green'] | |
# or you can use a string. # If using a string, the key is: | |
# g = green (letter exists in correct location) | |
# y = yellow (letter exists but not in this location) | |
# r = gray (letter does not exists in the word) | |
>>> g.update_clues_and_guess('gyrrr') | |
>>> guess = g.get_next_guess(); guess | |
... | |
:class: WordleGame | |
:param words_df: the corpus, as a dataframe structured with columns as | |
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
word 1 2 3 4 5 ... [letters] ... score | |
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
and each row as a token | |
word 1 2 3 4 5 a c á ... score | |
srian s r i a n True False False ... 0.1 | |
hairc h a i r c True True False ... 0.001 | |
chiar c h i a r True True False ... 0.2 | |
The final score column is used to create the probability distribution | |
of tokens from which the guess will be selected | |
""" | |
def __init__(self, words_df): | |
self.current_masks = [] | |
self.guesses = [] | |
self.guesses_and_results = [] | |
# Ensure score column is numeric when initializing | |
words_df['score'] = pd.to_numeric(words_df['score'], errors='coerce').fillna(0) | |
self.corpus = words_df | |
self.filtered_corpus = words_df | |
def make_guess(self): | |
# Convert to float and handle any potential issues | |
scores = self.filtered_corpus['score'].astype(float) | |
total_score = scores.sum() | |
if total_score == 0: | |
# If all scores are 0, use uniform distribution | |
probabilities = np.ones(len(scores)) / len(scores) | |
else: | |
probabilities = scores / total_score | |
guess_idx = np.random.choice(self.filtered_corpus.index, p=probabilities.values) | |
return self.filtered_corpus.loc[guess_idx]['word'] | |
def filter_db_from_masks(self): | |
if len(self.current_masks) > 0: | |
mask = np.array(self.current_masks).prod(axis=0).astype(np.bool_) | |
self.filtered_corpus = self.corpus[mask] | |
return self.filtered_corpus | |
def next_best_guesses(self): | |
return self.filtered_corpus | |
def masks_from_guess(self, guess, list_of_colors): | |
# takes a word and list of colors, the pandas dataframe (db), and the | |
# list of known masks, if it exists | |
# outputs a mask to filter the words based on | |
# the constraints resulting from that guess | |
# input of list of colors can either be a list of ['Gray', 'Green', 'Yellow'] | |
# or a string of ['GYR'], where R is gray | |
db = self.corpus | |
for idx, l in enumerate(guess): | |
if list_of_colors[idx].lower() in ["g", "green"]: | |
print(f'{l} g') | |
# if it's green then the letter in position idx is correct | |
# which means the word has the letter, and it's in that position | |
position = str(idx+1) # this is the string position to reference the db | |
self.current_masks.append(db[position] == l) | |
self.current_masks.append(db[l] == True) | |
elif list_of_colors[idx].lower() in ["y", "yellow"]: | |
print(f'{l} y') | |
# here the letter is in the secret word, but | |
# the letter is not in the right position | |
position = str(idx+1) # this is the string position to reference the db | |
# so we know that the letter at this position is not this letter | |
self.current_masks.append(db[position] != l) | |
# and we know that the letter is in the word | |
self.current_masks.append(db[l] == True) | |
elif list_of_colors[idx].lower() in ["r", "gray", "grey"]: | |
print(f'{l} r') | |
# here the letter is not in the secret word | |
self.current_masks.append(db[l] == False) | |
return self.current_masks | |
def list_possible_guesses(self): | |
filtered_db = self.filter_db_from_masks() | |
return filtered_db['word'] | |
def get_next_guess(self): | |
filtered_db = self.filter_db_from_masks() | |
guess = self.make_guess() | |
self.guesses.append(guess) | |
return guess | |
def update_clues_and_guess(self, clues, guess=None): | |
if guess is None: | |
guess = self.guesses[-1] | |
self.masks_from_guess(guess, clues) | |
@staticmethod | |
def evaluate_guess(guess, secret_word): | |
# evaluates a guess against the secret_word and outputs | |
# the list of colors for that word | |
colors = [] | |
for idx, l in enumerate(guess): | |
if secret_word[idx] == l: | |
v = 'Green' | |
elif l in secret_word: | |
v = 'Yellow' | |
else: | |
v = 'Gray' | |
colors.append(v) | |
return colors |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment