Skip to content

Instantly share code, notes, and snippets.

@jlmelville
Created December 29, 2018 19:47
Show Gist options
  • Save jlmelville/689c860534f30aa85b21e4aba265fec6 to your computer and use it in GitHub Desktop.
Save jlmelville/689c860534f30aa85b21e4aba265fec6 to your computer and use it in GitHub Desktop.
Find the longest word using the given vector of letters.
words_canon <- function(words) {
words$canon <- apply(words, 1,
function(x) {
paste(
stringr::str_sort(
unlist(
strsplit(
tolower(as.character(x)), split = "",
fixed = TRUE))),
collapse = "")
}
)
words
}
words_length <- function(x) {
sapply(x, nchar)
}
word_find <- function(words, word) {
subwords <- word
while (TRUE) {
if (length(subwords) == 0) {
return("Sorry, got nothing")
}
new_subwords <- c()
len <- nchar(subwords[1])
message("Searching words of length ", len)
lwords <- words[words$length == len, ]
for (subword in subwords) {
res <- lwords[grep(join(c("^", word_canon(subword), "$")), lwords$canon), 1]
if (nrow(res) > 0) {
return(res)
}
new_subwords <- c(new_subwords, make_subwords(subword))
}
subwords <- unique(sort(new_subwords))
}
}
word_findl <- function(word_list, word) {
subwords <- word_canon(word)
while (TRUE) {
if (length(subwords) == 0) {
return("Sorry, got nothing")
}
new_subwords <- list()
len <- nchar(subwords[1])
message("Searching words of length ", len)
nsubwords <- length(subwords)
for (i in 1:nsubwords) {
subword <- subwords[[i]]
res <- word_list[[subword]]
if (length(res) > 0) {
return(res)
}
new_subwords[[i]] <- make_subwords(subword)
}
subwords <- unique(unlist(new_subwords))
}
}
wordl_findl <- function(wordl_list, word) {
subwords <- word_canon(word)
while (TRUE) {
if (length(subwords) == 0) {
return("Sorry, got nothing")
}
new_subwords <- list()
len <- nchar(subwords[1])
message("Searching words of length ", len)
wordl_sublist <- wordl_list[[as.character(len)]]
nsubwords <- length(subwords)
for (i in 1:nsubwords) {
subword <- subwords[[i]]
res <- wordl_sublist[[subword]]
if (length(res) > 0) {
return(res)
}
new_subwords[[i]] <- make_subwords(subword)
}
subwords <- unique(unlist(new_subwords))
}
}
words_to_list <- function(words) {
res <- list()
for (i in 1:nrow(words)) {
word_row <- words[i, ]
word <- word_row[1]
anagrams <- res[[word_row$canon]]
if (is.null(anagrams)) {
anagrams <- word
}
else {
anagrams <- c(anagrams, word)
}
res[[word_row$canon]] <- anagrams
}
res
}
wordl_len <- function(word_list) {
res <- list()
anagrams <- names(word_list)
nanagrams <- length(anagrams)
for (i in 1:nanagrams) {
anagram <- anagrams[[i]]
len <- as.character(nchar(anagram))
words <- unlist(word_list[[anagram]])
names(words) <- NULL
lwords <- res[[len]]
if (is.null(lwords)) {
lwords <- list()
}
lwords[[anagram]] <- words
res[[len]] <- lwords
if (i %% 1000 == 0) {
message("Finished ", i, " / ", nanagrams)
}
}
res
}
chars <- function(word) {
strsplit(word, split = "", fixed = TRUE)[[1]]
}
join <- function(chs) {
paste0(chs, collapse = "")
}
joinc <- function(chs) {
stringi::stri_paste(chs, collapse = "")
}
word_canon <- function(word) {
joinc(
sort(
chars(word)
))
}
make_subwords <- function(word) {
chs <- sort(chars(word))
nchs <- length(chs)
res <- rep("", nchs)
for (i in 1:nchs) {
res[[i]] <- join(chs[-i])
}
unique(res)
}
@jlmelville
Copy link
Author

Prepping the word list:

words <- read_csv("path/to/wordlist")
wnoupp <- words[-grep("^[A-Z].*", words$X1), ]
word_list <- words_to_list(wnoupp)
wordl_list <- worldl_len(word_list)

Looking for words:

wordl_findl(wordl_list, "xnawjdarvseiiit")
Searching words of length 15
Searching words of length 14
Searching words of length 13
Searching words of length 12
Searching words of length 11
Searching words of length 10
[1] "aviatrixes"

@jlmelville
Copy link
Author

Sometimes a man wants to play some Letter Quest Remastered on his Switch while watching the Great British Bake Off on Netflix.

And sometimes a man just wants to pointlessly cheat at Letter Quest and has RStudio open.

I made no effort to think hard or do any meaningful research on this problem. Imagine this is like one of those Peter Norvig blog posts, except that I have no idea what I'm doing.

Apart from the obvious issue of the availability of dynamically sized vectors and dictionaries in R itself, I was surprised to find how often profiling said the slowest part was joining a vector of single characters into a string.

Stuff I should have done:

  • Added an extra layer to the word list based on the string prefix.
  • Taken into account the relative value of the letters.
  • Not written this in R.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment