Created
February 8, 2022 00:02
-
-
Save twjang/5d6118f80a1884d3759640a128d43c46 to your computer and use it in GitHub Desktop.
Wordle word propose
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
### Wordle word suggestion | |
import math | |
import re | |
import os | |
import sys | |
import pprint | |
import requests | |
class WordleSolve: | |
worddict = './words.txt' | |
def __init__(self, length): | |
if not os.path.exists(self.worddict): | |
print(f'[!] dict not found at {self.worddict}.. downloading..') | |
with open(self.worddict, 'wb') as f: | |
resp = requests.get('https://github.com/dwyl/english-words/raw/master/words_alpha.txt') | |
f.write(resp.content) | |
print(f'[+] download done') | |
with open(self.worddict) as f: | |
words = [w.strip() for w in f.readlines()] | |
self.words = [w for w in words if len(w) == length] | |
self.length = length | |
def letter_logfreq(self): | |
freq = {} | |
for w in self.words: | |
for c in w: | |
freq[c] = freq.get(c, 0) + 1 | |
n_letters = sum(list(freq.values())) | |
for c in freq: | |
freq[c] = -math.log(freq[c] / n_letters) | |
return freq | |
def sort_by_score(self): | |
logfreq = self.letter_logfreq() | |
max_loss = max(logfreq.values()) | |
pairs = [] | |
for w in self.words: | |
score = 0.0 | |
cnt = 0 | |
for c in set(list(w)): | |
score += logfreq[c] | |
cnt += 1 | |
pairs.append((score + (self.length - cnt) * max_loss, w)) | |
pairs.sort() | |
return pairs | |
def filter_by_res(self, inputs: str, res: str): | |
# 0 - not matched | |
# 1 - yellow | |
# 2 - green | |
# include right positioned chars | |
regex = ''.join([(c if r == '2' else '.') for c, r in zip(inputs, res)]) | |
print(f'to include: {regex}') | |
regex = re.compile(regex) | |
new_words = [] | |
for w in self.words: | |
if regex.match(w): | |
new_words.append(w) | |
self.words = new_words | |
# exclude wrong positioned chars | |
to_gen = [((idx, c) if r != '2' else None) for idx, (c, r) in enumerate(zip(inputs, res))] | |
to_gen = [t for t in to_gen if t is not None] | |
for idx, c in to_gen: | |
regex = ['.'] * self.length | |
regex[idx] = c | |
regex = ''.join(regex) | |
print(f'to exclude: {regex}') | |
regex = re.compile(regex) | |
new_words = [] | |
for w in self.words: | |
if regex.match(w) is None: | |
new_words.append(w) | |
self.words = new_words | |
# exclude unincluded chars | |
to_gen = {} | |
for c, r in zip(inputs, res): | |
if r == '0': | |
if c not in to_gen or to_gen[c] == '0': | |
to_gen[c] = r | |
else: to_gen[c] = r | |
to_gen2 = [] | |
for c, r in to_gen.items(): | |
if r == '0': to_gen2.append(c) | |
to_gen = to_gen2 | |
print (f'exclude chars: {to_gen}') | |
for c in to_gen: | |
new_words = [] | |
for w in self.words: | |
if not c in w: new_words.append(w) | |
self.words = new_words | |
# contains yellow chars | |
cntstr = ''.join([(c if r != '0' else '') for c, r in zip(inputs, res)]) | |
cnts = {} | |
for c in cntstr: cnts[c] = True | |
new_words = [] | |
for w in self.words: | |
cur_cnts = {} | |
for c in w: cur_cnts[c] = True | |
all_match = True | |
for c in cnts: | |
if cnts[c] != cur_cnts.get(c, False): | |
all_match = False | |
break | |
if all_match: | |
new_words.append(w) | |
self.words = new_words | |
if __name__ == '__main__': | |
wordlen = 5 | |
if len(sys.argv) >= 2: | |
wordlen = int(sys.argv[1]) | |
print(f'Using word length: {wordlen}') | |
s = WordleSolve(wordlen) | |
while True: | |
print('--> Suggestions: ') | |
for score, word in s.sort_by_score()[:20]: | |
print(f' {word}: {score:.3f}') | |
input_ok = False | |
while not input_ok: | |
sys.stdout.write('> ') | |
sys.stdout.flush() | |
line = sys.stdin.readline() | |
inputs = [x.strip() for x in line.strip().split()] | |
if len(inputs) != 2 or len(inputs[0]) != len(inputs[1]): | |
print('Error: input format should be "[your answer] [result]"') | |
continue | |
input_ok = True | |
s.filter_by_res(inputs[0], inputs[1]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment