Last active
November 29, 2022 18:39
-
-
Save rindPHI/75bc6afbe8e767a019fb3185382f9bcc to your computer and use it in GitHub Desktop.
Wordle Solver Based on Z3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# MIT License | |
# | |
# Copyright (c) 2022 Dominic Steinhöfel | |
# | |
# Permission is hereby granted, free of charge, to any person obtaining a copy | |
# of this software and associated documentation files (the "Software"), to deal | |
# in the Software without restriction, including without limitation the rights | |
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
# copies of the Software, and to permit persons to whom the Software is | |
# furnished to do so, subject to the following conditions: | |
# | |
# The above copyright notice and this permission notice shall be included in all | |
# copies or substantial portions of the Software. | |
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
import operator | |
import random | |
import urllib.request | |
from functools import reduce | |
from math import prod | |
from typing import Tuple, List, cast, Dict | |
import z3 | |
FREQUENCIES = { | |
'E': 10.693858415377402, | |
'S': 10.68917018284107, | |
'A': 8.55602437880919, | |
'R': 6.774496015002344, | |
'O': 6.277543366150961, | |
'L': 5.794655414908579, | |
'I': 5.6071261134552275, | |
'T': 5.53680262541022, | |
'N': 4.669479606188467, | |
'D': 4.275668073136427, | |
'U': 3.4974214721050165, | |
'C': 3.3286451007969995, | |
'P': 3.2676980778246603, | |
'M': 2.8176277543366153, | |
'Y': 2.7379278012189405, | |
'H': 2.723863103609939, | |
'B': 2.414439756211908, | |
'G': 2.3253633380215657, | |
'K': 1.9549929676511955, | |
'F': 1.9362400375058604, | |
'W': 1.7346460384435067, | |
'V': 1.1439287388654478, | |
'X': 0.4078762306610408, | |
'Z': 0.33755274261603374, | |
'J': 0.30942334739803096, | |
'Q': 0.1875293014533521 | |
} | |
def solve_wordle( | |
correct_choices: str = "", | |
wrong_characters: str = "", | |
wrong_positions: Tuple[str, str, str, str, str] = ("", "", "", "", "")) -> str: | |
if not correct_choices and not wrong_characters and all(not wp for wp in wrong_positions): | |
return get_first_word() | |
if wrong_characters and not correct_choices and all(not wp for wp in wrong_positions): | |
return get_first_word(wrong_characters) | |
correct_choices = [c if c.strip() else None for c in correct_choices.ljust(5)] | |
wrong_characters = [c for c in wrong_characters] | |
# Five int variables, one for each characters. Representing char points. | |
xs = z3.Ints([f"x_{i + 1}" for i in range(5)]) | |
solver = z3.Optimize() | |
# Constrain the variables to the char points of A to Z. | |
add(solver, *[z3.Or(*[xs[i] == z3.IntVal(val) for val in range(ord("A"), ord("Z"))]) for i in range(5)]) | |
# No variable may attain the value of a forbidden character. | |
add(solver, z3.And(*[z3.Not(xs[i] == z3.IntVal(ord(char))) for i in range(5) for char in wrong_characters])) | |
# Respect correct choices: Uniqueness constraints on the respective variables. | |
add(solver, z3.And(*[xs[i] == z3.IntVal(ord(correct_choices[i])) for i in range(5) if correct_choices[i]])) | |
# Wrong positions: | |
# Exclude the characters at the signalled wrong positions. | |
add(solver, *[z3.Not(xs[i] == z3.IntVal(ord(c))) for i in range(5) for c in wrong_positions[i]]) | |
# But: At any other position, these characters must occur! | |
add(solver, *[z3.Or(*[xs[k] == z3.IntVal(ord(c)) for k in range(5) if k != i]) | |
for i in range(5) for c in wrong_positions[i]]) | |
# Solution must be contained in the list of valid five character words. | |
words = get_five_character_words() | |
add(solver, z3.Or(*[z3.And(*[xs[i] == z3.IntVal(ord(word[i])) for i in range(5)]) for word in words])) | |
# Optimization: Minimize the probability of repeated characters, to maximize value of feedback | |
solver.minimize(reduce( | |
operator.add, | |
[z3.If(xs[i] == xs[j], z3.IntVal(1), z3.IntVal(0)) for i in range(5) for j in range(i + 1, 5)])) | |
# Optimization: Choose solution with the most frequent characters. | |
# First, define a "frequency" function, and encode the FREQUENCIES dictionary in it. | |
f = z3.Function("frequency", z3.IntSort(), z3.RealSort()) | |
add(solver, *[cast(z3.BoolRef, f(z3.IntVal(i)) == z3.RealVal(FREQUENCIES[chr(i)])) | |
for i in range(ord("A"), ord("Z") + 1)]) | |
# The actual optimization: Maximize the frequencies of all characters. | |
solver.maximize(f(xs[0]) * f(xs[1]) * f(xs[2]) * f(xs[3]) * f(xs[4])) | |
assert solver.check() == z3.sat | |
return "".join([chr(solver.model().get_interp(xs[idx]).as_long()) for idx in range(5)]) | |
def add(solver, *formulas: z3.BoolRef): | |
for formula in formulas: | |
solver.add(formula) | |
def get_first_word(wrong_characters: str = "") -> str: | |
"""Returns a word consisting of five different characters. With higher probability, returns a word consisting | |
of characters that occur frequently in the word set we use. If `wrong_characters` is not empty, those characters | |
won't occur in the returned word (if this is possible). Otherwise, an IndexError is raised.""" | |
options = get_start_words() | |
if wrong_characters: | |
options = [word for word in options if all(word[i] != e for i in range(5) for e in wrong_characters)] | |
return random.choices(options, weights=list(reversed([i * i * i for i in range(len(options))])), k=1)[0] | |
def get_start_words() -> List[str]: | |
"""Returns a list of words consisting of five different characters, sorted by accumulated frequency | |
of the contained characters in the whole set of five character words we use.""" | |
result = [word for word in get_five_character_words() | |
if all(word[i] != word[j] for i in range(5) for j in range(5) if i != j)] | |
key = lambda word: prod(FREQUENCIES[word[i]] * 100 for i in range(5)) ** 0.2 | |
return list(reversed(sorted(result, key=key))) | |
def get_five_character_words() -> List[str]: | |
"""Returns a list of five-character words in caps from a publicly available website""" | |
uf = urllib.request.urlopen("http://www.mieliestronk.com/corncob_caps.txt") | |
lines = uf.read().decode("utf-8").split("\r\n") | |
return [line for line in lines if len(line) == 5] | |
def calculate_frequencies() -> Dict[str, float]: | |
"""Calculates the frequencies of all 26 alphabetic characters in the word set we use.""" | |
words = get_five_character_words() | |
counting_table: Dict[str, int] = {} | |
for word in words: | |
for letter in word: | |
counting_table.setdefault(letter, 0) | |
counting_table[letter] += 1 | |
result: Dict[str, float] = { | |
letter: 100 * counting_table[letter] / (sum(counting_table.values())) | |
for letter in counting_table} | |
return result | |
if __name__ == '__main__': | |
# Wordle 216 | |
# print(solve_wordle(wrong_characters="", correct_choices="", wrong_positions=("", "", "", "", ""))) | |
# -> SMEAR | |
# print(solve_wordle(wrong_characters="SMEA", correct_choices="", wrong_positions=("", "", "", "", "R"))) | |
# -> INTRO | |
# print(solve_wordle(wrong_characters="SMEANTO", correct_choices="", wrong_positions=("I", "", "", "R", "R"))) | |
# -> LURID | |
# print(solve_wordle(wrong_characters="SMEANTOLUD", correct_choices="", wrong_positions=("R", "", "R", "RI", "R"))) | |
# -> PRICY | |
print(solve_wordle(wrong_characters="SMEANTOLUDY", correct_choices="PRIC", wrong_positions=("R", "", "R", "RI", "R"))) | |
# -> PRICK (-> solution) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment