Skip to content

Instantly share code, notes, and snippets.

@lindeloev
Last active January 18, 2022 18:26
Show Gist options
  • Save lindeloev/e8358e16b84872a3c1f364ff69de2dd2 to your computer and use it in GitHub Desktop.
Save lindeloev/e8358e16b84872a3c1f364ff69de2dd2 to your computer and use it in GitHub Desktop.
Optimal guesses for Wordle
# Use these functions to make smart guesses for Wordle (https://www.powerlanguage.co.uk/wordle/)
# find_word() returns words that satisfies the wordle feedback. Start with `possible_words`, i.e., all 5-letter english words.
# next_word() returns words that are most likely to result in green letters.
#
# A pretty good strategy on the next_word() output is to use "pathfinder" for the first two words and "guess" thereafter, picking the first commonly-known word.
#############
# FUNCTIONS #
#############
#' Detect the letter sequence that is likely to contain most green letters
#'
#' @param words A character vector of words
#' @param strategy
#' * `"guess"`: only optimize for green letters.
#' * `"learn"`: optimize for green AND yellow letters.
#' @param n How many top hits to show (from best to worse)
#' @return A vector of `n` words (best first)
next_word = function(words, strategy = "guess", n = 3) {
# Get frequency of every letter at every position
letter_pos_frequency = do.call(rbind, strsplit(words, "")) |>
as.data.frame() |>
lapply(table)
# Score each word as the sum of words with letters in these positions
df_scores = data.frame(word = words, score = 0)
wordlength = unique(nchar(words))
stopifnot("All words must have the same length" = length(wordlength) == 1)
for (i in seq_len(wordlength)) {
letter_i = substr(words, i, i)
df_scores$score = df_scores$score + letter_pos_frequency[[i]][letter_i]
# Also weight in yellow characters for "learn" strategy, i.e.,correct
# characters in the wrong position.
if (strategy == "learn") {
other_letter_pos_frequency = paste0(substr(words, 1, i-1), substr(words, i+1, wordlength)) |>
strsplit("") |>
unlist() |>
table()
# Give identification of yellow letters half the info-weight of green letters.
yellow_weight = 0.5 / wordlength
df_scores$score = df_scores$score + other_letter_pos_frequency[letter_i] * yellow_weight
}
}
# Return the best guess- and pathfinder words
df_ordered = df_scores[order(-df_scores$score), ]
if (strategy == "guess") {
head(df_ordered$word, n)
} else if (strategy == "learn") {
only_unique_characters = df_ordered$word |>
strsplit("") |>
lapply(\(x) length(unique(x)) == length(x)) |>
unlist()
head(df_ordered$word[only_unique_characters], n)
}
}
#' Find words that fulfill Wordle criteria
#'
#' @param words Vector of possible words at this step, e.g., `c("goats", "horse")`.
#' @param green Green characters in their correct position, e.g., `"s???e"`.
#' Write ? where there are no green characters.
#' @param grey Gray characters, e.g., `"car"`
#' @param yellows Yellow characters in their correct position, e.g., `c("???es", "??i??")`.
#' @return A vector of words
find_words = function(words, green = "?????", grey = "", yellows = c()) {
# GREEN: Keep words matching green letters in their position
regex_green = paste0("^", gsub("?", "[a-z]", tolower(green), fixed = TRUE), "$")
words_remaining = words[grepl(regex_green, words)]
# GREY: Remove words with grey letters
if (nchar(grey) > 0) {
grey_regex = gsub("(?<=.)(?=.)", "|", tolower(grey), perl = TRUE) # split characters by |
words_remaining = words_remaining[!grepl(grey_regex, words_remaining)]
}
# YELLOW
for (yellow in yellows) {
letters_i = strsplit(yellow, "") |> unlist()
for (letter in letters_i[letters_i != "?"]) {
# Yellow letter must not be in the entered position
letter_position = which(letters_i == letter)
illegal_words = substr(words_remaining, letter_position, letter_position) == letter
words_remaining = words_remaining[!illegal_words]
# Yellow letter must be present
legal_words = grepl(letter, words_remaining)
words_remaining = words_remaining[legal_words]
}
}
words_remaining
}
############
# APPLY IT #
############
# Vector of all English words
all_words = read.csv("https://raw.githubusercontent.com/dwyl/english-words/master/words_alpha.txt", header = FALSE, col.names = "word")$word
possible_words = all_words[nchar(all_words) == 5]
# 2022-01-01: Third guess
words = find_words(possible_words)
next_word(words, "learn") # I guess CARES
words = find_words(words, green = "????s", grey = "ca", yellows = c("??re?"))
next_word(words, "learn") # I guess TIERS
words = find_words(words, green = "????s", grey = "cati", yellows = c("??re?", "??er?"))
next_word(words, "guess", n = 100) # I guess REBUS
# 2022-01-02: Third guess
words = find_words(possible_words)
next_word(words, "learn") # I guess CARES
words = find_words(words, grey = "care", yellows = c("????s"))
next_word(words, "learn") # I guess SOILY
words = find_words(words, green = "?o???", grey = "careily", yellows = c("????s", "s????"))
next_word(words, "guess", n = 100) # I guess BOOST
# 2022-01-03: Fourth guess
words = find_words(possible_words)
next_word(words, "learn") # I guess CARES
words = find_words(words, green = "????s", grey = "cae", yellows = c("??r??"))
next_word(words, "learn") # I guess GROTS
words = find_words(words, green = "?r??s", grey = "caego", yellows = c("??r??", "???t?"))
next_word(words, "guess", n = 100) # I guess TRIMS
words = find_words(words, green = "tr??s", grey = "caegoim", yellows = c("??r??", "???t?"))
next_word(words, "guess", n = 100) # I guess TRUSS
# 2022-01-04: Third guess
words = find_words(possible_words)
next_word(words, "learn") # I guess CARES
words = find_words(words, grey = "car", yellows = c("???es"))
next_word(words, "learn") # I guess STILE
words = find_words(words, green = "s???e", grey = "cartl", yellows = c("???es", "??i??"))
next_word(words, "guess", n = 100) # I guess SIEGE
# 2022-01-05: Third guess
words = find_words(possible_words)
next_word(words, "learn") # I guess CARES
words = find_words(words, green = "???e?", grey = "cas", yellow = c("??r??"))
next_word(words, "learn") # DOTER
words = find_words(words, green = "???er", grey = "casdo", yellow = c("??r??", "??t??"))
next_word(words, "guess", n = 100) # TIGER
# 2022-01-06: third guess
words = find_words(possible_words)
next_word(words, "learn") # CARES
words = find_words(words, green = "?a???", grey = "cres")
next_word(words, "learn") # MANLY
words = find_words(words, green = "?an??", grey = "cresmy", yellow = c("???l?"))
next_word(words, "guess", n = 100) # BANAL
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment