Last active
January 29, 2022 14:11
-
-
Save HemersonTacon/30bef6273265e71bfe22050acc070ab5 to your computer and use it in GitHub Desktop.
wordle/term.ooo solver
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from string import ascii_lowercase | |
import argparse | |
def letter_frequency_builder(words_list): | |
corpus_length = len(words_list) | |
return {letter: sum(letter in word for word in words_list)/corpus_length for letter in ascii_lowercase} | |
def word_score(word, letter_frequency): | |
return sum(letter_frequency[letter] for letter in set(word)) | |
def best_n_guesses(n, words_list): | |
freq_dict = letter_frequency_builder(words_list) | |
return sorted(words_list, key=lambda word: word_score(word, freq_dict))[-n:], freq_dict | |
def print_guesses_and_score(words_list): | |
guesses, freq_dict = best_n_guesses(10, words_list) | |
for i, guess in enumerate(guesses[::-1]): | |
print(f"{i+1} {guess} - {word_score(guess, freq_dict)}") | |
def filter_word(word, in_position, not_in_position, not_in_word): | |
for letter, position in in_position.items(): | |
if not word[position] == letter: | |
return False | |
for letter, position in not_in_position.items(): | |
if word[position] == letter: | |
return False | |
letters_set = set(word) | |
in_word = set(not_in_position.keys()) | |
not_in_word = set(not_in_word) | |
return (letters_set.intersection(in_word) == set(in_word) and | |
letters_set.intersection(not_in_word) == set()) | |
def filter_words_list(words_list, in_position, not_in_position, not_in_word): | |
return set([word for word in words_list if filter_word(word, in_position, not_in_position, not_in_word)]) | |
def process_guess_and_result(guess, result): | |
result = result.lower() | |
if result.count("g") + result.count("b") + result.count("y") != 5: | |
raise ValueError(f"Invalid result {result}. " | |
f"It must contain exactly five letters and only the letters 'b', 'g' or 'y'.") | |
if len(guess) != 5: | |
raise ValueError(f"Invalid guess {guess}. " | |
f"It must contain exactly five letters.") | |
in_position = {} | |
not_in_position = {} | |
not_in_word = [] | |
for position, (guess_letter, result_letter) in enumerate(zip(guess, result)): | |
if result_letter == 'g': | |
in_position[guess_letter] = position | |
elif result_letter == 'y': | |
not_in_position[guess_letter] = position | |
else: | |
not_in_word.append(guess_letter) | |
return in_position, not_in_position, not_in_word | |
def build_portuguese_word_bank(words_list): | |
translation_table = str.maketrans({"à": 'a', "á": "a", "â": "a", "ã": "a", "ä": "a", | |
"é": "e", "ê": "e", "ë": "e", "è": "e", | |
"í": "i", "î": "i", "ï": "i", "ì": "i", | |
"ó": "o", "ô": "o", "õ": "o", "ò": "o", "ö": "o", | |
"ú": "u", "û": "u", "ü": "u", "ù": "u", | |
"ç": "c", | |
"ñ": "n"}) | |
return set([word.lower().translate(translation_table) for word in words_list if (len(word) == 5 and | |
'.' not in word and | |
'ª' not in word and | |
'º' not in word | |
)]) | |
def retrieve_english_resources(): | |
from nltk.corpus import words | |
try: | |
return words.words() | |
except Exception as e: | |
import nltk | |
nltk.download("words") | |
return words.words() | |
def retrieve_portuguese_resources(): | |
from nltk.corpus import machado | |
try: | |
return machado.words() | |
except Exception as e: | |
import nltk | |
nltk.download("machado") | |
return machado.words() | |
def new_game(language="english"): | |
if language == "english": | |
words_list = retrieve_english_resources() | |
updated_words_list = [word.lower() for word in words_list if len(word) == 5] | |
elif language == "portuguese": | |
words_list = retrieve_portuguese_resources() | |
updated_words_list = build_portuguese_word_bank(words_list) | |
else: | |
print(f"Language {language} is not supported.") | |
exit() | |
result = "BBBBB" | |
print_guesses_and_score(words_list=updated_words_list) | |
count = 0 | |
while result.lower() != "ggggg": | |
guess, result = input(""" | |
Type the attempted word and result. | |
Result must be a five letters indicating the individual letters result: | |
- b for black (letter not in the word); | |
- y for yellow (letter in the word but not in this position); | |
- g for green (letter in the word and in this position); | |
""").split() | |
try: | |
in_position, not_in_position, not_in_word = process_guess_and_result(guess, result) | |
updated_words_list = filter_words_list(updated_words_list, in_position, not_in_position, not_in_word) | |
if len(updated_words_list) == 0: | |
print("This word is not in my vocabulary D:") | |
exit(0) | |
print_guesses_and_score(words_list=updated_words_list) | |
count += 1 | |
except ValueError as e: | |
print(f"An error occurred while processing your guess and result: {e}") | |
if count < 7: | |
print("VICTORY!") | |
else: | |
print("Looks like this algorithm can be improved :(") | |
def parse_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("language", help="Language of the new game", choices=["english", "portuguese"]) | |
return parser.parse_args() | |
if __name__ == '__main__': | |
args = parse_args() | |
new_game(language=args.language) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Requires
nltk
installation:pip install nltk