madewokherd/guesl.py

## guesl.py

import collections
import math
import string
import sys

WORD_LENGTH = 5
VALID_ONLY = False
HARD_MODE = False
WORST_GUESS = False
DORDLE = False
USE_ENTROPY = False

#COMMON_WORDS_FILE = '/usr/share/dict/words'
#COMMON_WORDS_FILE = 'all-words'
#EXTRA_WORDS_FILE = 'all-words'

#COMMON_WORDS_FILE = 'common-words'
#EXTRA_WORDS_FILE = 'extra-words'

COMMON_WORDS_FILE = '/usr/share/dict/words'
EXTRA_WORDS_FILE = '/dev/null'

# constraint kinds:
POS_MATCH = 0 # character at index {num} of the word is this letter
POS_NOMATCH = 1 # character at index {num} of the word is not this letter, AND at least one of this letter appears in the word
LETTER_MIN = 2 # at least {num} of this letter appears in the word, num >= 2
LETTER_MAX = 3 # at most {num} of this letter appears in the word

constraint = collections.namedtuple('constraint', ['kind', 'letter', 'num'])

def all_constraints(word_length=WORD_LENGTH):
    for i in range(word_length):
        for letter in string.ascii_lowercase:
            if i:
                yield constraint(LETTER_MIN, letter, i+1)
            yield constraint(LETTER_MAX, letter, i)
            yield constraint(POS_MATCH, letter, i)
            yield constraint(POS_NOMATCH, letter, i)

def test_constraint(con, word):
    kind, letter, num = con
    if kind == LETTER_MIN:
        return word.count(letter) >= num
    if kind == LETTER_MAX:
        return word.count(letter) <= num
    if kind == POS_MATCH:
        return word[num] == letter
    if kind == POS_NOMATCH:
        return word[num] != letter and letter in word

all_words = set()

with open(COMMON_WORDS_FILE, 'r') as f:
    all_letters = frozenset(string.ascii_lowercase) # avoid words with upper-case letters as they may be names
    for line in f:
        word = line.strip()
        if len(word) == WORD_LENGTH and all_letters.issuperset(word):
            all_words.add(word.lower())

possible_words = all_words.copy()

guessable_words = all_words.copy()

with open(EXTRA_WORDS_FILE, 'r') as f:
    all_letters = frozenset(string.ascii_lowercase) # avoid words with upper-case letters as they may be names
    for line in f:
        word = line.strip()
        if len(word) == WORD_LENGTH and all_letters.issuperset(word):
            guessable_words.add(word.lower())

if DORDLE:
    possible_words2 = possible_words.copy()
    guessable_words2 = possible_words.copy()

words_by_constraint = {}

for con in all_constraints():
    kind, letter, num = con
    matching_words = set()
    for word in guessable_words if (VALID_ONLY or HARD_MODE) else possible_words:
        if test_constraint(con, word):
            matching_words.add(word)
    words_by_constraint[con] = matching_words

def constraints_from_guess(guess_word, actual_word):
    result = []
    for letter in set(guess_word):
        guessed_count = guess_word.count(letter)
        actual_count = actual_word.count(letter)
        if guessed_count > actual_count:
            # if we guessed more of this letter than appear in the actual word, we know the exact amount
            yield LETTER_MAX, letter, actual_count
            if actual_count >= 2:
                yield LETTER_MIN, letter, actual_count
        if actual_count:
            for i in range(len(guess_word)):
                if guess_word[i] == letter:
                    if actual_word[i] == letter:
                        yield POS_MATCH, letter, i
                    else:
                        yield POS_NOMATCH, letter, i
            if guessed_count <= actual_count and guessed_count >= 2:
                yield LETTER_MIN, letter, guessed_count

def get_possibility_sets(word, possible_words):
    remaining_words = possible_words.copy()
    while remaining_words:
        possibility_set = remaining_words.copy()
        actual_word = remaining_words.pop()
        constraint_set = frozenset(constraints_from_guess(word, actual_word))
        for con in constraint_set:
            possibility_set.intersection_update(words_by_constraint[con])
        assert actual_word in possibility_set
        yield possibility_set
        remaining_words.difference_update(possibility_set)

if USE_ENTROPY:
    def score_word(word, possible_words):
        entropy = 0.0

        for i in get_possibility_sets(word, possible_words):
            probability = len(i) / len(possible_words)
            entropy += probability * math.log2(probability)

        return entropy, word not in possible_words
else:
    def score_word(word, possible_words):
        return max(len(x) for x in get_possibility_sets(word, possible_words)), word not in possible_words

def add_constraint(con, possible_words, guessable_words):
    possible_words.intersection_update(words_by_constraint[con])
    if VALID_ONLY or (HARD_MODE and con.kind != LETTER_MAX):
        guessable_words.intersection_update(words_by_constraint[con])

def process_constraint_string(s):
    # string format:
    #  Gln - Letter l is in position n
    #  Yln - Letter l is in the word but not in position n
    #  Lln - Letter l appears at least n times in the word, n >= 2
    #  Uln - Letter l appears at most n times in the word (Ux0 would mean x does not appear in the word)
    chars_to_kind = {'G': POS_MATCH, 'Y': POS_NOMATCH, 'L': LETTER_MIN, 'U': LETTER_MAX}
    i = 0
    for token in s.split():
        if token == '/':
            i = 1
            continue
        kind = chars_to_kind[token[0]]
        letter = token[1].lower()
        num = int(token[2:])
        if i:
            add_constraint(constraint(kind, letter, num), possible_words2, guessable_words2)
        else:
            add_constraint(constraint(kind, letter, num), possible_words, guessable_words)

process_constraint_string(' '.join(sys.argv[1:]))

while True:
    print('Remaining words:', len(possible_words))
    if len(possible_words) <= 32:
        print(' '.join(possible_words))
    if DORDLE:
        print('Remaining words(2):', len(possible_words2))
        if len(possible_words2) <= 32:
            print(' '.join(possible_words2))
        if len(possible_words2) == 1 and not WORST_GUESS:
            # We found it!
            print('Found(2):', list(possible_words2)[0])
            DORDLE = False
    if len(possible_words) == 1 and not WORST_GUESS:
        # We found it!
        print('Found:', list(possible_words)[0])
        if DORDLE:
            DORDLE = False
            possible_words = possible_words2
            guessable_words = guessable_words2
        else:
            sys.exit()
    best_word = 'resin'
    if WORST_GUESS:
        best_word_score = 0, False
    elif DORDLE:
        best_word_score = len(possible_words) + len(possible_words2), 2
    else:
        best_word_score = len(possible_words), True # score by the minimum possible remaining words after making this guess
    for word in (guessable_words|guessable_words2) if DORDLE else guessable_words:
        score = score_word(word, possible_words)
        if DORDLE:
            score2 = score_word(word, possible_words2)
            score = score[0]+score2[0], score[1]+score2[1]
        if (score > best_word_score) if WORST_GUESS else (score < best_word_score):
            best_word = word
            best_word_score = score
        #print(word, score)
    print('Guess:', best_word)
    if USE_ENTROPY:
        print('Entropy:', -best_word_score[0], '' if best_word_score[1] else '*')
    else:
        print('Remaining words after this guess:', best_word_score[0], '' if best_word_score[1] else '*')
    process_constraint_string(input('Enter further constraints: '))

	import collections
	import math
	import string
	import sys

	WORD_LENGTH = 5
	VALID_ONLY = False
	HARD_MODE = False
	WORST_GUESS = False
	DORDLE = False
	USE_ENTROPY = False

	#COMMON_WORDS_FILE = '/usr/share/dict/words'
	#COMMON_WORDS_FILE = 'all-words'
	#EXTRA_WORDS_FILE = 'all-words'

	#COMMON_WORDS_FILE = 'common-words'
	#EXTRA_WORDS_FILE = 'extra-words'

	COMMON_WORDS_FILE = '/usr/share/dict/words'
	EXTRA_WORDS_FILE = '/dev/null'

	# constraint kinds:
	POS_MATCH = 0 # character at index {num} of the word is this letter
	POS_NOMATCH = 1 # character at index {num} of the word is not this letter, AND at least one of this letter appears in the word
	LETTER_MIN = 2 # at least {num} of this letter appears in the word, num >= 2
	LETTER_MAX = 3 # at most {num} of this letter appears in the word

	constraint = collections.namedtuple('constraint', ['kind', 'letter', 'num'])

	def all_constraints(word_length=WORD_LENGTH):
	for i in range(word_length):
	for letter in string.ascii_lowercase:
	if i:
	yield constraint(LETTER_MIN, letter, i+1)
	yield constraint(LETTER_MAX, letter, i)
	yield constraint(POS_MATCH, letter, i)
	yield constraint(POS_NOMATCH, letter, i)

	def test_constraint(con, word):
	kind, letter, num = con
	if kind == LETTER_MIN:
	return word.count(letter) >= num
	if kind == LETTER_MAX:
	return word.count(letter) <= num
	if kind == POS_MATCH:
	return word[num] == letter
	if kind == POS_NOMATCH:
	return word[num] != letter and letter in word

	all_words = set()

	with open(COMMON_WORDS_FILE, 'r') as f:
	all_letters = frozenset(string.ascii_lowercase) # avoid words with upper-case letters as they may be names
	for line in f:
	word = line.strip()
	if len(word) == WORD_LENGTH and all_letters.issuperset(word):
	all_words.add(word.lower())

	possible_words = all_words.copy()

	guessable_words = all_words.copy()

	with open(EXTRA_WORDS_FILE, 'r') as f:
	all_letters = frozenset(string.ascii_lowercase) # avoid words with upper-case letters as they may be names
	for line in f:
	word = line.strip()
	if len(word) == WORD_LENGTH and all_letters.issuperset(word):
	guessable_words.add(word.lower())

	if DORDLE:
	possible_words2 = possible_words.copy()
	guessable_words2 = possible_words.copy()

	words_by_constraint = {}

	for con in all_constraints():
	kind, letter, num = con
	matching_words = set()
	for word in guessable_words if (VALID_ONLY or HARD_MODE) else possible_words:
	if test_constraint(con, word):
	matching_words.add(word)
	words_by_constraint[con] = matching_words

	def constraints_from_guess(guess_word, actual_word):
	result = []
	for letter in set(guess_word):
	guessed_count = guess_word.count(letter)
	actual_count = actual_word.count(letter)
	if guessed_count > actual_count:
	# if we guessed more of this letter than appear in the actual word, we know the exact amount
	yield LETTER_MAX, letter, actual_count
	if actual_count >= 2:
	yield LETTER_MIN, letter, actual_count
	if actual_count:
	for i in range(len(guess_word)):
	if guess_word[i] == letter:
	if actual_word[i] == letter:
	yield POS_MATCH, letter, i
	else:
	yield POS_NOMATCH, letter, i
	if guessed_count <= actual_count and guessed_count >= 2:
	yield LETTER_MIN, letter, guessed_count

	def get_possibility_sets(word, possible_words):
	remaining_words = possible_words.copy()
	while remaining_words:
	possibility_set = remaining_words.copy()
	actual_word = remaining_words.pop()
	constraint_set = frozenset(constraints_from_guess(word, actual_word))
	for con in constraint_set:
	possibility_set.intersection_update(words_by_constraint[con])
	assert actual_word in possibility_set
	yield possibility_set
	remaining_words.difference_update(possibility_set)

	if USE_ENTROPY:
	def score_word(word, possible_words):
	entropy = 0.0

	for i in get_possibility_sets(word, possible_words):
	probability = len(i) / len(possible_words)
	entropy += probability * math.log2(probability)

	return entropy, word not in possible_words
	else:
	def score_word(word, possible_words):
	return max(len(x) for x in get_possibility_sets(word, possible_words)), word not in possible_words

	def add_constraint(con, possible_words, guessable_words):
	possible_words.intersection_update(words_by_constraint[con])
	if VALID_ONLY or (HARD_MODE and con.kind != LETTER_MAX):
	guessable_words.intersection_update(words_by_constraint[con])

	def process_constraint_string(s):
	# string format:
	# Gln - Letter l is in position n
	# Yln - Letter l is in the word but not in position n
	# Lln - Letter l appears at least n times in the word, n >= 2
	# Uln - Letter l appears at most n times in the word (Ux0 would mean x does not appear in the word)
	chars_to_kind = {'G': POS_MATCH, 'Y': POS_NOMATCH, 'L': LETTER_MIN, 'U': LETTER_MAX}
	i = 0
	for token in s.split():
	if token == '/':
	i = 1
	continue
	kind = chars_to_kind[token[0]]
	letter = token[1].lower()
	num = int(token[2:])
	if i:
	add_constraint(constraint(kind, letter, num), possible_words2, guessable_words2)
	else:
	add_constraint(constraint(kind, letter, num), possible_words, guessable_words)

	process_constraint_string(' '.join(sys.argv[1:]))

	while True:
	print('Remaining words:', len(possible_words))
	if len(possible_words) <= 32:
	print(' '.join(possible_words))
	if DORDLE:
	print('Remaining words(2):', len(possible_words2))
	if len(possible_words2) <= 32:
	print(' '.join(possible_words2))
	if len(possible_words2) == 1 and not WORST_GUESS:
	# We found it!
	print('Found(2):', list(possible_words2)[0])
	DORDLE = False
	if len(possible_words) == 1 and not WORST_GUESS:
	# We found it!
	print('Found:', list(possible_words)[0])
	if DORDLE:
	DORDLE = False
	possible_words = possible_words2
	guessable_words = guessable_words2
	else:
	sys.exit()
	best_word = 'resin'
	if WORST_GUESS:
	best_word_score = 0, False
	elif DORDLE:
	best_word_score = len(possible_words) + len(possible_words2), 2
	else:
	best_word_score = len(possible_words), True # score by the minimum possible remaining words after making this guess
	for word in (guessable_words\|guessable_words2) if DORDLE else guessable_words:
	score = score_word(word, possible_words)
	if DORDLE:
	score2 = score_word(word, possible_words2)
	score = score[0]+score2[0], score[1]+score2[1]
	if (score > best_word_score) if WORST_GUESS else (score < best_word_score):
	best_word = word
	best_word_score = score
	#print(word, score)
	print('Guess:', best_word)
	if USE_ENTROPY:
	print('Entropy:', -best_word_score[0], '' if best_word_score[1] else '*')
	else:
	print('Remaining words after this guess:', best_word_score[0], '' if best_word_score[1] else '*')
	process_constraint_string(input('Enter further constraints: '))