Skip to content

Instantly share code, notes, and snippets.

@Radcliffe
Created April 9, 2022 21:32
Show Gist options
  • Save Radcliffe/7cc34028db9fe6d1643bf0e893dca42e to your computer and use it in GitHub Desktop.
Save Radcliffe/7cc34028db9fe6d1643bf0e893dca42e to your computer and use it in GitHub Desktop.
Python script to determine the optimal starting guess for Wordle using Shannon information entropy.
# This Python script calculates the Shannon information entropy
# for each initial guess in Wordle. Words with high entropy are
# good starting guesses for Wordle.
import numpy as np
import requests
from collections import Counter
def get_data():
"""
This function retrieves the Wordle wordlist from a pastebin site.
It returns the list of all five-letter words which are allowed as guesses,
and a shorter list of common five-letter words, from which the secret
word of the day is selected.
"""
url = 'https://paste.ee/d/4zigF/0'
lines = requests.get(url).text.split('\n')[1:-1]
words = [line.split('\t')[2] for line in lines]
targets = words[:words.index('aahed')]
return words, targets
def get_hints(guess, target):
"""
This function returns the hints resulting from a guess and a secret (target) word.
The hints are encoded as a five-digit number from 00000 to 22222, where 0 means that
the letter is not in the target, 1 means that the letter is in the target but in the
wrong position, and 2 means that the letter is in the target and in the correct position.
"""
guess = guess.lower()
target = target.lower()
length = len(target)
hints = 0
matched = [False] * length
for i in range(length):
if guess[i] == target[i]:
hints += 2 * 10 ** (length - i - 1)
matched[i] = True
for i in range(length):
if guess[i] != target[i]:
for j in range(length):
if guess[i] == target[j] and not matched[j]:
hints += 10 ** (length - i - 1)
matched[j] = True
break
return hints
def entropy(guess, targets):
"""
Calculates the Shannon information entropy for a guess, given a list of target words.
"""
counter = Counter(get_hints(guess, target) for target in targets)
x = np.array(list(counter.values()))
p = x / np.sum(x)
return -np.sum(p * np.log2(p))
def main():
words, targets = get_data()
print("Most informative guess - common words only:")
print(*max((entropy(guess, targets), guess) for guess in targets))
print("Most informative guess - all words:")
print(*max((entropy(guess, targets), guess) for guess in words))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment