nvanderw/wordle-optimizer.py

## wordle-optimizer.py
# For finding the best wordle starting word

import argparse
import itertools
import math
import random

from dataclasses import dataclass
from multiprocessing import Pool
from typing import Any

def read_lines(path):
    with open(path, "r") as handle:
        line = handle.readline()
        while line:
            yield line.rstrip()
            line = handle.readline()

def is_valid_wordle_word(word):
    return len(word) == 5 and all(c.islower() for c in word)

def sample_with_replacement(list, num_samples):
    for _ in range(num_samples):
        yield list[random.randrange(0, len(list))]

def filter_from_answer(answer, guess, words):
    for (i, c) in enumerate(guess):
        if answer[i] == c:
            # Right letter in right position
            words = [word for word in words if word[i] == c]
        elif c in answer:
            words = [word for word in words if word[i] != c and c in word]
        else:
            words = [word for word in words if not c in word]
    return words

def estimate_entropy(context, candidate_word):
    num_samples = len(context.samples)
    entropy_before = math.log2(len(context.wordle_words))
    sum = 0
    for sample in context.samples:
        entropy_after = math.log2(len(filter_from_answer(sample, candidate_word, context.wordle_words)))
        entropy_gained = entropy_before - entropy_after
        sum += entropy_gained

    return (candidate_word, sum / num_samples)

results = []

# The main reason to put all this in the context object is so that multiprocessing can
# serialize/send it to the child processes, so that they can pass it along to estimate_entropy.
# I tried passing pool.map a function which was closed over these values and it failed because
# functions cannot be serialized.
@dataclass
class Context:
    wordle_words: Any
    samples: Any

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Find the best wordle starting words")
    parser.add_argument(
        '-p',
        '--parallel',
        metavar="N",
        type=int,
        nargs=1,
        default=[None],
        help="Number of worker processes to spawn. Defaults to OS cores detected.")
    parser.add_argument(
        '-d',
        '--dictionary',
        metavar="path",
        type=str,
        nargs=1,
        required=True,
        help="Path to dictionary (required).")
    parser.add_argument(
        '-s',
        '--numsamples',
        metavar="N",
        type=int,
        nargs=1,
        default=[100],
        help="Number of random samples to test each word against. More samples=more accuracy/more compute.")
    parser.add_argument(
        '-w',
        '--numwords',
        metavar="N",
        type=int,
        nargs=1,
        default=[10],
        help="Show top N words.")
    args = parser.parse_args()

    dictionary_path = args.dictionary[0]
    wordle_words = [word for word in read_lines(dictionary_path) if is_valid_wordle_word(word)]

    # Basic algorithm: for each wordle word, estimate the average amount of entropy gained by picking it.
    entropy_before = math.log2(len(wordle_words))
    samples = list(sample_with_replacement(wordle_words, args.numsamples[0]))

    context = Context(wordle_words, samples)

    with Pool(args.parallel[0]) as pool:
        results = pool.starmap(
            estimate_entropy,
            zip(
                itertools.repeat(context, len(wordle_words)),
                wordle_words))

    results.sort(key=lambda kvp: -kvp[1])

    print("word,average entropy")
    for result in results[0:args.numwords[0]]:
        print(f"{result[0]},{result[1]}")
	# For finding the best wordle starting word

	import argparse
	import itertools
	import math
	import random

	from dataclasses import dataclass
	from multiprocessing import Pool
	from typing import Any

	def read_lines(path):
	with open(path, "r") as handle:
	line = handle.readline()
	while line:
	yield line.rstrip()
	line = handle.readline()

	def is_valid_wordle_word(word):
	return len(word) == 5 and all(c.islower() for c in word)

	def sample_with_replacement(list, num_samples):
	for _ in range(num_samples):
	yield list[random.randrange(0, len(list))]

	def filter_from_answer(answer, guess, words):
	for (i, c) in enumerate(guess):
	if answer[i] == c:
	# Right letter in right position
	words = [word for word in words if word[i] == c]
	elif c in answer:
	words = [word for word in words if word[i] != c and c in word]
	else:
	words = [word for word in words if not c in word]
	return words

	def estimate_entropy(context, candidate_word):
	num_samples = len(context.samples)
	entropy_before = math.log2(len(context.wordle_words))
	sum = 0
	for sample in context.samples:
	entropy_after = math.log2(len(filter_from_answer(sample, candidate_word, context.wordle_words)))
	entropy_gained = entropy_before - entropy_after
	sum += entropy_gained

	return (candidate_word, sum / num_samples)

	results = []

	# The main reason to put all this in the context object is so that multiprocessing can
	# serialize/send it to the child processes, so that they can pass it along to estimate_entropy.
	# I tried passing pool.map a function which was closed over these values and it failed because
	# functions cannot be serialized.
	@dataclass
	class Context:
	wordle_words: Any
	samples: Any

	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Find the best wordle starting words")
	parser.add_argument(
	'-p',
	'--parallel',
	metavar="N",
	type=int,
	nargs=1,
	default=[None],
	help="Number of worker processes to spawn. Defaults to OS cores detected.")
	parser.add_argument(
	'-d',
	'--dictionary',
	metavar="path",
	type=str,
	nargs=1,
	required=True,
	help="Path to dictionary (required).")
	parser.add_argument(
	'-s',
	'--numsamples',
	metavar="N",
	type=int,
	nargs=1,
	default=[100],
	help="Number of random samples to test each word against. More samples=more accuracy/more compute.")
	parser.add_argument(
	'-w',
	'--numwords',
	metavar="N",
	type=int,
	nargs=1,
	default=[10],
	help="Show top N words.")
	args = parser.parse_args()

	dictionary_path = args.dictionary[0]
	wordle_words = [word for word in read_lines(dictionary_path) if is_valid_wordle_word(word)]

	# Basic algorithm: for each wordle word, estimate the average amount of entropy gained by picking it.
	entropy_before = math.log2(len(wordle_words))
	samples = list(sample_with_replacement(wordle_words, args.numsamples[0]))

	context = Context(wordle_words, samples)

	with Pool(args.parallel[0]) as pool:
	results = pool.starmap(
	estimate_entropy,
	zip(
	itertools.repeat(context, len(wordle_words)),
	wordle_words))

	results.sort(key=lambda kvp: -kvp[1])

	print("word,average entropy")
	for result in results[0:args.numwords[0]]:
	print(f"{result[0]},{result[1]}")