twjang/wordle.py

## wordle.py
#!/usr/bin/env python3

### Wordle word suggestion

import math
import re
import os
import sys
import pprint
import requests

class WordleSolve:
    worddict = './words.txt'

    def __init__(self, length):
        if not os.path.exists(self.worddict):
            print(f'[!] dict not found at {self.worddict}.. downloading..')
            with open(self.worddict, 'wb') as f:
                resp = requests.get('https://github.com/dwyl/english-words/raw/master/words_alpha.txt')
                f.write(resp.content)
            print(f'[+] download done')

        with open(self.worddict) as f:
            words = [w.strip() for w in f.readlines()]
        self.words = [w for w in words if len(w) == length]
        self.length = length

    def letter_logfreq(self):
        freq = {}
        for w in self.words:
            for c in w:
                freq[c] = freq.get(c, 0) + 1
        n_letters = sum(list(freq.values()))

        for c in freq:
            freq[c] = -math.log(freq[c] / n_letters)

        return freq

    def sort_by_score(self):
        logfreq = self.letter_logfreq()
        max_loss = max(logfreq.values())

        pairs = []
        for w in self.words:
            score = 0.0
            cnt = 0
            for c in set(list(w)):
                score += logfreq[c]
                cnt += 1
            pairs.append((score + (self.length - cnt) * max_loss, w))
        pairs.sort()
        return pairs

    def filter_by_res(self, inputs: str, res: str):
        # 0 - not matched
        # 1 - yellow
        # 2 - green

        # include right positioned chars
        regex = ''.join([(c if r == '2' else '.') for c, r in zip(inputs, res)])
        print(f'to include: {regex}')
        regex = re.compile(regex)
        new_words = []
        for w in self.words:
            if regex.match(w):
                new_words.append(w)
        self.words = new_words

        # exclude wrong positioned chars
        to_gen = [((idx, c) if r != '2' else None) for idx, (c, r) in enumerate(zip(inputs, res))]
        to_gen = [t for t in to_gen if t is not None]
        for idx, c  in to_gen:
            regex = ['.'] * self.length
            regex[idx] = c
            regex = ''.join(regex)
            print(f'to exclude: {regex}')

            regex = re.compile(regex)
            new_words = []
            for w in self.words:
                if regex.match(w) is None:
                    new_words.append(w)
            self.words = new_words

        # exclude unincluded chars
        to_gen = {}
        for c, r in zip(inputs, res):
            if r == '0':
                if c not in to_gen or to_gen[c] == '0':
                    to_gen[c] = r
            else: to_gen[c] = r

        to_gen2 = []
        for c, r in to_gen.items():
            if r == '0': to_gen2.append(c)
        to_gen = to_gen2
        print (f'exclude chars: {to_gen}')
        for c in to_gen:
            new_words = []
            for w in self.words:
                if not c in w: new_words.append(w)
            self.words = new_words

        # contains yellow chars
        cntstr = ''.join([(c if r != '0' else '') for c, r in zip(inputs, res)])
        cnts = {}
        for c in cntstr: cnts[c] = True

        new_words = []
        for w in self.words:
            cur_cnts = {}
            for c in w: cur_cnts[c] = True
            all_match = True
            for c in cnts:
                if cnts[c] != cur_cnts.get(c, False):
                    all_match = False
                    break
            if all_match:
                new_words.append(w)
        self.words = new_words

if __name__ == '__main__':
    wordlen = 5
    if len(sys.argv) >= 2:
        wordlen = int(sys.argv[1])
    print(f'Using word length: {wordlen}')
    s = WordleSolve(wordlen)

    while True:
        print('--> Suggestions: ')
        for score, word in s.sort_by_score()[:20]:
            print(f'  {word}: {score:.3f}')

        input_ok = False
        while not input_ok:
            sys.stdout.write('> ')
            sys.stdout.flush()
            line = sys.stdin.readline()
            inputs = [x.strip() for x in line.strip().split()]
            if len(inputs) != 2 or len(inputs[0]) != len(inputs[1]):
                print('Error: input format should be "[your answer] [result]"')
                continue
            input_ok = True
        s.filter_by_res(inputs[0], inputs[1])
	#!/usr/bin/env python3

	### Wordle word suggestion

	import math
	import re
	import os
	import sys
	import pprint
	import requests

	class WordleSolve:
	worddict = './words.txt'

	def __init__(self, length):
	if not os.path.exists(self.worddict):
	print(f'[!] dict not found at {self.worddict}.. downloading..')
	with open(self.worddict, 'wb') as f:
	resp = requests.get('https://github.com/dwyl/english-words/raw/master/words_alpha.txt')
	f.write(resp.content)
	print(f'[+] download done')

	with open(self.worddict) as f:
	words = [w.strip() for w in f.readlines()]
	self.words = [w for w in words if len(w) == length]
	self.length = length

	def letter_logfreq(self):
	freq = {}
	for w in self.words:
	for c in w:
	freq[c] = freq.get(c, 0) + 1
	n_letters = sum(list(freq.values()))

	for c in freq:
	freq[c] = -math.log(freq[c] / n_letters)

	return freq

	def sort_by_score(self):
	logfreq = self.letter_logfreq()
	max_loss = max(logfreq.values())

	pairs = []
	for w in self.words:
	score = 0.0
	cnt = 0
	for c in set(list(w)):
	score += logfreq[c]
	cnt += 1
	pairs.append((score + (self.length - cnt) * max_loss, w))
	pairs.sort()
	return pairs

	def filter_by_res(self, inputs: str, res: str):
	# 0 - not matched
	# 1 - yellow
	# 2 - green

	# include right positioned chars
	regex = ''.join([(c if r == '2' else '.') for c, r in zip(inputs, res)])
	print(f'to include: {regex}')
	regex = re.compile(regex)
	new_words = []
	for w in self.words:
	if regex.match(w):
	new_words.append(w)
	self.words = new_words

	# exclude wrong positioned chars
	to_gen = [((idx, c) if r != '2' else None) for idx, (c, r) in enumerate(zip(inputs, res))]
	to_gen = [t for t in to_gen if t is not None]
	for idx, c in to_gen:
	regex = ['.'] * self.length
	regex[idx] = c
	regex = ''.join(regex)
	print(f'to exclude: {regex}')

	regex = re.compile(regex)
	new_words = []
	for w in self.words:
	if regex.match(w) is None:
	new_words.append(w)
	self.words = new_words

	# exclude unincluded chars
	to_gen = {}
	for c, r in zip(inputs, res):
	if r == '0':
	if c not in to_gen or to_gen[c] == '0':
	to_gen[c] = r
	else: to_gen[c] = r

	to_gen2 = []
	for c, r in to_gen.items():
	if r == '0': to_gen2.append(c)
	to_gen = to_gen2
	print (f'exclude chars: {to_gen}')
	for c in to_gen:
	new_words = []
	for w in self.words:
	if not c in w: new_words.append(w)
	self.words = new_words

	# contains yellow chars
	cntstr = ''.join([(c if r != '0' else '') for c, r in zip(inputs, res)])
	cnts = {}
	for c in cntstr: cnts[c] = True

	new_words = []
	for w in self.words:
	cur_cnts = {}
	for c in w: cur_cnts[c] = True
	all_match = True
	for c in cnts:
	if cnts[c] != cur_cnts.get(c, False):
	all_match = False
	break
	if all_match:
	new_words.append(w)
	self.words = new_words

	if __name__ == '__main__':
	wordlen = 5
	if len(sys.argv) >= 2:
	wordlen = int(sys.argv[1])
	print(f'Using word length: {wordlen}')
	s = WordleSolve(wordlen)

	while True:
	print('--> Suggestions: ')
	for score, word in s.sort_by_score()[:20]:
	print(f' {word}: {score:.3f}')

	input_ok = False
	while not input_ok:
	sys.stdout.write('> ')
	sys.stdout.flush()
	line = sys.stdin.readline()
	inputs = [x.strip() for x in line.strip().split()]
	if len(inputs) != 2 or len(inputs[0]) != len(inputs[1]):
	print('Error: input format should be "[your answer] [result]"')
	continue
	input_ok = True
	s.filter_by_res(inputs[0], inputs[1])