Skip to content

Instantly share code, notes, and snippets.

@billdenney
Created February 5, 2022 18:51
Show Gist options
  • Save billdenney/80097e1c5a0fb5f80d30dd0efa89f6d5 to your computer and use it in GitHub Desktop.
Save billdenney/80097e1c5a0fb5f80d30dd0efa89f6d5 to your computer and use it in GitHub Desktop.
Solve all words in wordle
# Load all required libraries
library(tidyverse)
# Functions ####
#' Load the possible word lists from the Wordle source
#'
#' @param url The javascript source for finding the word lists
#' @param match_possible_words A word that is within the word list for the
#' possible words (mainly, words that people are likely to know, the Wordle
#' developer was kind)
#' @param match_all_words A word that is within the word list for all words
#' (more than just the possible words)
#' @param expected_characters The number of characters expected in every word,
#' used as a quality check for the outputs
#' @return A named list with names of "possible" and "all" for the possible
#' and all word lists
#' @export
load_wordle_word_lists <- function(
url="https://www.powerlanguage.co.uk/wordle/main.c1506a22.js",
match_possible_words="balmy", match_all_words="aahed",
expected_characters=5) {
word_source_raw <- readLines(url)
## There are two word lists in the source, one that is allowed words to be the
## correct answer, and the other that is all words allowed to be guessed
two_lists <- c(possible=match_possible_words, all=match_all_words)
two_lists_l <- list()
for (current_word_type in names(two_lists)) {
current_word <- two_lists[[current_word_type]]
mask <- grepl(x=word_source_raw, pattern=current_word)
word_source_words_raw <-
gsub(
x=word_source_raw[mask],
pattern=sprintf('^.*\\[([",a-z]*%s[",a-z]*)\\].*$', current_word),
replacement="\\1"
)
#word_source_words_raw <- gsub(x=word_source_raw[mask], pattern='^.*?\\[((?:\\"[a-z]{5}\\",?)+)\\].*?$', replacement="\\1")
two_lists_l[[current_word_type]] <-
gsub(x=strsplit(x=word_source_words_raw, split=",")[[1]], pattern='"', replacement="")
}
## All words that are allowed
words <- unname(unlist(two_lists_l))
## All words that are possible
possible_words <- two_lists_l$balmy
# Make sure that loading the data worked correctly (all words should have 5 characters)
stopifnot(all(nchar(words) == expected_characters))
words[nchar(words) != expected_characters]
two_lists_l
}
#' Generate a data.frame with all the letter/position for all words
#'
#' @param word The list of words (character vector)
#' @return A data.frame with rownames of \code{word} and the first column "word"
#' containing \code{word}. Then additional columns for each letter and
#' letter/position ("a", "a1", "a2", ...) with a boolean indicating if the
#' letter is in the word or the letter is in the word at the position.
#' @examples
#' add_word_attributes(c("binge", "splut"))
#' @export
wordle_has_letter_setup <- function(word) {
stopifnot(length(unique(nchar(word))) == 1)
ret <- data.frame(word=word)
for (current_letter in letters) {
ret[[current_letter]] <- grepl(x=word, pattern=current_letter, fixed=TRUE)
for (current_idx in seq_len(nchar(word[1]))) {
ret[[paste0(current_letter, current_idx)]] <- current_letter == substr(word, current_idx, current_idx)
}
}
rownames(ret) <- word
ret
}
#' Generate the "colors" for a word guess relative to the possible words
#'
#' @param guess The word that was guessed
#' @param possible The vector of possible words
#' @return An integer vector where 0 indicates no in the word, 1 indicates
#' elsewhere in the word, and 2 indicates at the current location in the word.
#' @export
wordle_guess_colors <- function(current_guess, wordle_has_letter_df) {
states <- rep(0, nrow(wordle_has_letter_df))
for (idx in seq_len(nchar(current_guess))) {
current_letter <- substr(current_guess, idx, idx)
is_anywhere <- wordle_has_letter_df[, current_letter]
is_here <- wordle_has_letter_df[, paste0(current_letter, idx)]
states <- states*10 + (is_anywhere + is_here)
}
as.integer(states)
}
#' Generate a data.frame of "colors" where the first column are the possible
#' words to guess and the remaining columns are the colors generated by the
#' guessed word.
#'
#' @inheritParams wordle_guess_colors
wordle_guess_colors_df <- function(possible, all_words) {
wordle_has_letter_df <- wordle_has_letter_setup(possible)
all_words <- sort(unique(c(possible, all_words)))
ret <- data.frame(X=possible)
pb <- txtProgressBar(min=0, max=length(all_words), style=3)
for (current_all in all_words) {
setTxtProgressBar(pb, value=getTxtProgressBar(pb) + 1)
ret[[current_all]] <-
wordle_guess_colors(
current_guess=current_all,
wordle_has_letter_df=wordle_has_letter_df
)
}
ret
}
new_wordle_game <- function(correct_word, possible_df, expected_char=5) {
stopifnot(is.character(correct_word))
stopifnot(length(correct_word) == 1)
stopifnot(nchar(correct_word) == expected_char)
list(
correct=correct_word,
possible_df=possible_df,
available_rows=rep(TRUE, nrow(possible_df)),
available_cols=c(FALSE, rep(TRUE, ncol(possible_df) - 1))
)
}
wordle_game_state <- function(game_state, guess, verbose=TRUE) {
guess_value <- game_state$possible_df[game_state$possible_df$X == game_state$correct, guess]
history_new <- setNames(guess_value, guess)
game_state$history <- c(game_state$history, history_new)
game_state$available_rows <- game_state$available_rows & game_state$possible_df[, guess] == guess_value
# game_state$available_cols <-
# (names(game_state$available_cols) %in% game_state$possible_df$X)
if (verbose) {
message(
"Number of possible words: ", sum(game_state$available_rows), "\n",
"Number of possible guesses: ", sum(game_state$available_cols), "\n",
"History: ", paste(names(game_state$history), game_state$history, sep="=", collapse=", ")
)
}
game_state
}
wordle_choose_guess <- function(game_state, metric_fun, verbose=TRUE) {
possible_words <- game_state$possible_df[[1]][game_state$available_rows]
if (length(possible_words) < 3) {
# If there are two choices, guess
return(setNames(rep(-Inf, length(possible_words)), possible_words))
}
if (!is.list(metric_fun)) {
metric_fun <- list(metric_fun)
}
if (verbose) {
pb <- txtProgressBar(min=0, max=sum(game_state$available_cols), style=3)
pb_metric_fun <- function(...) {
setTxtProgressBar(pb, value=getTxtProgressBar(pb) + 1)
metric_fun[[1]](...)
}
} else {
pb_metric_fun <- metric_fun[[1]]
}
metrics <-
sapply(
# operate only on the available rows from the available columns
X=game_state$possible_df[game_state$available_cols][game_state$available_rows, ],
FUN=pb_metric_fun
)
# First prioritize improved metrics. Then prioritize words that are possible
# (so that you can get it in 1 more).
ret_metrics <- metrics[order(metrics, -(names(metrics) %in% possible_words))]
# If there are more than one metric functions defined, break top-level ties with the next metric function
mask_equal_to_best <- ret_metrics == ret_metrics[[1]]
if ((length(metric_fun) > 1) & sum(mask_equal_to_best) > 1) {
metrics2 <-
sapply(
# operate only on the available rows from the available columns
X=game_state$possible_df[names(ret_metrics)][game_state$available_rows, ],
FUN=metric_fun[[2]]
)
ret <- ret_metrics[mask_equal_to_best][order(metrics2)]
} else {
ret <- ret_metrics
}
ret
}
wordle_choose_guess_cache <- function(metric_fun) {
cache <- force(list())
metric_fun <- force(metric_fun)
function(game_state, verbose) {
history_chr <- paste(names(game_state$history), game_state$history, sep="=", collapse=", ")
if (history_chr %in% names(cache)) {
if (verbose) message("Using cache for ", history_chr, ": ", names(cache[[history_chr]]))
} else {
# Only store the first guess, since that is all that is used.
cache[[history_chr]] <<- wordle_choose_guess(game_state=game_state, metric_fun=metric_fun, verbose=verbose)[1]
}
cache[[history_chr]]
}
}
# Autoplay ####
wordle_autoplay <- function(game_state, guess_fun, initial_guess, verbose_guess=FALSE, verbose_state=FALSE) {
game_state <- wordle_game_state(game_state, guess=initial_guess, verbose=verbose_state)
while (game_state$history[length(game_state$history)] != 22222) {
current_guess <- guess_fun(game_state=game_state, verbose=verbose_guess)
game_state <- wordle_game_state(game_state, guess=names(current_guess)[1], verbose=verbose_state)
}
message(
"Game complete for ", game_state$correct, "; ", length(game_state$history), " guesses; ",
paste(names(game_state$history), game_state$history, sep="=", collapse=", ")
)
game_state
}
# Word selection metrics ####
wordle_metric_smallest_big_group <- function(x) {
max(summary(factor(x)))
}
wordle_metric_most_to_median_group <- function(x) {
sf <- rev(sort(summary(factor(x))))
-unname(which(cumsum(sf) > (length(x)/2))[1])
}
wordle_metric_smallest_median_group <- function(x) {
sf <- sort(summary(factor(x)))
unname(sf[which(cumsum(sf) > (length(x)/2))[1]])
}
guess_cache_smallest_big_group <- wordle_choose_guess_cache(metric_fun=wordle_metric_smallest_big_group)
guess_cache_smallest_median_group <- wordle_choose_guess_cache(metric_fun=wordle_metric_smallest_median_group)
guess_cache_smallest_big_then_median_group <- wordle_choose_guess_cache(metric_fun=list(wordle_metric_smallest_big_group, wordle_metric_smallest_median_group))
# Game Setup ####
words <- load_wordle_word_lists()
wordle_guesses_possible <- wordle_guess_colors_df(possible=words$possible, all_words=words$all)
# Start a game ####
# game_pleat <- new_wordle_game(correct_word="pleat", possible_df=wordle_guesses_possible)
# initial_guess_smallest_big_group <- wordle_choose_guess(game_state=game_pleat, metric_fun=wordle_metric_smallest_big_group)
# initial_guess_most_to_median_group <- wordle_choose_guess(game_state=game_pleat, metric_fun=wordle_metric_most_to_median_group)
# initial_guess_smallest_median_group <- wordle_choose_guess(game_state=game_pleat, metric_fun=wordle_metric_smallest_median_group)
pb_all_words <- txtProgressBar(min=0, max=nrow(wordle_guesses_possible), style=3)
ret <- list()
for (current_word in sort(wordle_guesses_possible$X)) {
setTxtProgressBar(pb_all_words, value=getTxtProgressBar(pb_all_words) + 1, title="All words")
ret[[current_word]] <-
wordle_autoplay(
game_state=new_wordle_game(correct_word=current_word, possible_df=wordle_guesses_possible),
guess_fun=guess_cache_smallest_big_group,
initial_guess="raise"
)
}
guess_distribution <- sapply(X=ret, FUN=function(x) length(x$history))
ggplot2::ggplot(data=data.frame(distr=guess_distribution), aes(x=distr)) +
geom_histogram()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment