HemersonTacon/wordle_solver.py

## wordle_solver.py
from string import ascii_lowercase
import argparse


def letter_frequency_builder(words_list):
    corpus_length = len(words_list)
    return {letter: sum(letter in word for word in words_list)/corpus_length for letter in ascii_lowercase}


def word_score(word, letter_frequency):

    return sum(letter_frequency[letter] for letter in set(word))


def best_n_guesses(n, words_list):
    freq_dict = letter_frequency_builder(words_list)
    return sorted(words_list, key=lambda word: word_score(word, freq_dict))[-n:], freq_dict


def print_guesses_and_score(words_list):
    guesses, freq_dict = best_n_guesses(10, words_list)
    for i, guess in enumerate(guesses[::-1]):
        print(f"{i+1} {guess} - {word_score(guess, freq_dict)}")


def filter_word(word, in_position, not_in_position, not_in_word):
    for letter, position in in_position.items():
        if not word[position] == letter:
            return False

    for letter, position in not_in_position.items():
        if word[position] == letter:
            return False

    letters_set = set(word)
    in_word = set(not_in_position.keys())
    not_in_word = set(not_in_word)
    return (letters_set.intersection(in_word) == set(in_word) and
            letters_set.intersection(not_in_word) == set())


def filter_words_list(words_list, in_position, not_in_position, not_in_word):

    return set([word for word in words_list if filter_word(word, in_position, not_in_position, not_in_word)])


def process_guess_and_result(guess, result):
    result = result.lower()
    if result.count("g") + result.count("b") + result.count("y") != 5:
        raise ValueError(f"Invalid result {result}. "
                         f"It must contain exactly five letters and only the letters 'b', 'g' or 'y'.")
    if len(guess) != 5:
        raise ValueError(f"Invalid guess {guess}. "
                         f"It must contain exactly five letters.")
    in_position = {}
    not_in_position = {}
    not_in_word = []
    for position, (guess_letter, result_letter) in enumerate(zip(guess, result)):
        if result_letter == 'g':
            in_position[guess_letter] = position
        elif result_letter == 'y':
            not_in_position[guess_letter] = position
        else:
            not_in_word.append(guess_letter)

    return in_position, not_in_position, not_in_word


def build_portuguese_word_bank(words_list):
    translation_table = str.maketrans({"à": 'a', "á": "a", "â": "a", "ã": "a", "ä": "a",
                                       "é": "e", "ê": "e", "ë": "e", "è": "e",
                                       "í": "i", "î": "i", "ï": "i", "ì": "i",
                                       "ó": "o", "ô": "o", "õ": "o", "ò": "o", "ö": "o",
                                       "ú": "u", "û": "u", "ü": "u", "ù": "u",
                                       "ç": "c",
                                       "ñ": "n"})
    return set([word.lower().translate(translation_table) for word in words_list if (len(word) == 5 and
                                                                                     '.' not in word and
                                                                                     'ª' not in word and
                                                                                     'º' not in word
                                                                                     )])


def retrieve_english_resources():
    from nltk.corpus import words
    try:
        return words.words()
    except Exception as e:
        import nltk
        nltk.download("words")
        return words.words()


def retrieve_portuguese_resources():
    from nltk.corpus import machado
    try:
        return machado.words()
    except Exception as e:
        import nltk
        nltk.download("machado")
        return machado.words()


def new_game(language="english"):

    if language == "english":
        words_list = retrieve_english_resources()
        updated_words_list = [word.lower() for word in words_list if len(word) == 5]
    elif language == "portuguese":
        words_list = retrieve_portuguese_resources()
        updated_words_list = build_portuguese_word_bank(words_list)
    else:
        print(f"Language {language} is not supported.")
        exit()

    result = "BBBBB"
    print_guesses_and_score(words_list=updated_words_list)
    count = 0
    while result.lower() != "ggggg":
        guess, result = input("""
        Type the attempted word and result.
        Result must be a five letters indicating the individual letters result:
         - b for black (letter not in the word);
         - y for yellow (letter in the word but not in this position);
         - g for green (letter in the word and in this position);
         """).split()
        try:
            in_position, not_in_position, not_in_word = process_guess_and_result(guess, result)
            updated_words_list = filter_words_list(updated_words_list, in_position, not_in_position, not_in_word)
            if len(updated_words_list) == 0:
                print("This word is not in my vocabulary D:")
                exit(0)
            print_guesses_and_score(words_list=updated_words_list)
            count += 1
        except ValueError as e:
            print(f"An error occurred while processing your guess and result: {e}")

    if count < 7:
        print("VICTORY!")
    else:
        print("Looks like this algorithm can be improved :(")


def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("language", help="Language of the new game", choices=["english", "portuguese"])

    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    new_game(language=args.language)
	from string import ascii_lowercase
	import argparse


	def letter_frequency_builder(words_list):
	corpus_length = len(words_list)
	return {letter: sum(letter in word for word in words_list)/corpus_length for letter in ascii_lowercase}


	def word_score(word, letter_frequency):

	return sum(letter_frequency[letter] for letter in set(word))


	def best_n_guesses(n, words_list):
	freq_dict = letter_frequency_builder(words_list)
	return sorted(words_list, key=lambda word: word_score(word, freq_dict))[-n:], freq_dict


	def print_guesses_and_score(words_list):
	guesses, freq_dict = best_n_guesses(10, words_list)
	for i, guess in enumerate(guesses[::-1]):
	print(f"{i+1} {guess} - {word_score(guess, freq_dict)}")


	def filter_word(word, in_position, not_in_position, not_in_word):
	for letter, position in in_position.items():
	if not word[position] == letter:
	return False

	for letter, position in not_in_position.items():
	if word[position] == letter:
	return False

	letters_set = set(word)
	in_word = set(not_in_position.keys())
	not_in_word = set(not_in_word)
	return (letters_set.intersection(in_word) == set(in_word) and
	letters_set.intersection(not_in_word) == set())


	def filter_words_list(words_list, in_position, not_in_position, not_in_word):

	return set([word for word in words_list if filter_word(word, in_position, not_in_position, not_in_word)])


	def process_guess_and_result(guess, result):
	result = result.lower()
	if result.count("g") + result.count("b") + result.count("y") != 5:
	raise ValueError(f"Invalid result {result}. "
	f"It must contain exactly five letters and only the letters 'b', 'g' or 'y'.")
	if len(guess) != 5:
	raise ValueError(f"Invalid guess {guess}. "
	f"It must contain exactly five letters.")
	in_position = {}
	not_in_position = {}
	not_in_word = []
	for position, (guess_letter, result_letter) in enumerate(zip(guess, result)):
	if result_letter == 'g':
	in_position[guess_letter] = position
	elif result_letter == 'y':
	not_in_position[guess_letter] = position
	else:
	not_in_word.append(guess_letter)

	return in_position, not_in_position, not_in_word


	def build_portuguese_word_bank(words_list):
	translation_table = str.maketrans({"à": 'a', "á": "a", "â": "a", "ã": "a", "ä": "a",
	"é": "e", "ê": "e", "ë": "e", "è": "e",
	"í": "i", "î": "i", "ï": "i", "ì": "i",
	"ó": "o", "ô": "o", "õ": "o", "ò": "o", "ö": "o",
	"ú": "u", "û": "u", "ü": "u", "ù": "u",
	"ç": "c",
	"ñ": "n"})
	return set([word.lower().translate(translation_table) for word in words_list if (len(word) == 5 and
	'.' not in word and
	'ª' not in word and
	'º' not in word
	)])


	def retrieve_english_resources():
	from nltk.corpus import words
	try:
	return words.words()
	except Exception as e:
	import nltk
	nltk.download("words")
	return words.words()


	def retrieve_portuguese_resources():
	from nltk.corpus import machado
	try:
	return machado.words()
	except Exception as e:
	import nltk
	nltk.download("machado")
	return machado.words()


	def new_game(language="english"):

	if language == "english":
	words_list = retrieve_english_resources()
	updated_words_list = [word.lower() for word in words_list if len(word) == 5]
	elif language == "portuguese":
	words_list = retrieve_portuguese_resources()
	updated_words_list = build_portuguese_word_bank(words_list)
	else:
	print(f"Language {language} is not supported.")
	exit()

	result = "BBBBB"
	print_guesses_and_score(words_list=updated_words_list)
	count = 0
	while result.lower() != "ggggg":
	guess, result = input("""
	Type the attempted word and result.
	Result must be a five letters indicating the individual letters result:
	- b for black (letter not in the word);
	- y for yellow (letter in the word but not in this position);
	- g for green (letter in the word and in this position);
	""").split()
	try:
	in_position, not_in_position, not_in_word = process_guess_and_result(guess, result)
	updated_words_list = filter_words_list(updated_words_list, in_position, not_in_position, not_in_word)
	if len(updated_words_list) == 0:
	print("This word is not in my vocabulary D:")
	exit(0)
	print_guesses_and_score(words_list=updated_words_list)
	count += 1
	except ValueError as e:
	print(f"An error occurred while processing your guess and result: {e}")

	if count < 7:
	print("VICTORY!")
	else:
	print("Looks like this algorithm can be improved :(")


	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument("language", help="Language of the new game", choices=["english", "portuguese"])

	return parser.parse_args()


	if __name__ == '__main__':
	args = parse_args()
	new_game(language=args.language)