Created
December 29, 2018 19:47
-
-
Save jlmelville/689c860534f30aa85b21e4aba265fec6 to your computer and use it in GitHub Desktop.
Find the longest word using the given vector of letters.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
words_canon <- function(words) { | |
words$canon <- apply(words, 1, | |
function(x) { | |
paste( | |
stringr::str_sort( | |
unlist( | |
strsplit( | |
tolower(as.character(x)), split = "", | |
fixed = TRUE))), | |
collapse = "") | |
} | |
) | |
words | |
} | |
words_length <- function(x) { | |
sapply(x, nchar) | |
} | |
word_find <- function(words, word) { | |
subwords <- word | |
while (TRUE) { | |
if (length(subwords) == 0) { | |
return("Sorry, got nothing") | |
} | |
new_subwords <- c() | |
len <- nchar(subwords[1]) | |
message("Searching words of length ", len) | |
lwords <- words[words$length == len, ] | |
for (subword in subwords) { | |
res <- lwords[grep(join(c("^", word_canon(subword), "$")), lwords$canon), 1] | |
if (nrow(res) > 0) { | |
return(res) | |
} | |
new_subwords <- c(new_subwords, make_subwords(subword)) | |
} | |
subwords <- unique(sort(new_subwords)) | |
} | |
} | |
word_findl <- function(word_list, word) { | |
subwords <- word_canon(word) | |
while (TRUE) { | |
if (length(subwords) == 0) { | |
return("Sorry, got nothing") | |
} | |
new_subwords <- list() | |
len <- nchar(subwords[1]) | |
message("Searching words of length ", len) | |
nsubwords <- length(subwords) | |
for (i in 1:nsubwords) { | |
subword <- subwords[[i]] | |
res <- word_list[[subword]] | |
if (length(res) > 0) { | |
return(res) | |
} | |
new_subwords[[i]] <- make_subwords(subword) | |
} | |
subwords <- unique(unlist(new_subwords)) | |
} | |
} | |
wordl_findl <- function(wordl_list, word) { | |
subwords <- word_canon(word) | |
while (TRUE) { | |
if (length(subwords) == 0) { | |
return("Sorry, got nothing") | |
} | |
new_subwords <- list() | |
len <- nchar(subwords[1]) | |
message("Searching words of length ", len) | |
wordl_sublist <- wordl_list[[as.character(len)]] | |
nsubwords <- length(subwords) | |
for (i in 1:nsubwords) { | |
subword <- subwords[[i]] | |
res <- wordl_sublist[[subword]] | |
if (length(res) > 0) { | |
return(res) | |
} | |
new_subwords[[i]] <- make_subwords(subword) | |
} | |
subwords <- unique(unlist(new_subwords)) | |
} | |
} | |
words_to_list <- function(words) { | |
res <- list() | |
for (i in 1:nrow(words)) { | |
word_row <- words[i, ] | |
word <- word_row[1] | |
anagrams <- res[[word_row$canon]] | |
if (is.null(anagrams)) { | |
anagrams <- word | |
} | |
else { | |
anagrams <- c(anagrams, word) | |
} | |
res[[word_row$canon]] <- anagrams | |
} | |
res | |
} | |
wordl_len <- function(word_list) { | |
res <- list() | |
anagrams <- names(word_list) | |
nanagrams <- length(anagrams) | |
for (i in 1:nanagrams) { | |
anagram <- anagrams[[i]] | |
len <- as.character(nchar(anagram)) | |
words <- unlist(word_list[[anagram]]) | |
names(words) <- NULL | |
lwords <- res[[len]] | |
if (is.null(lwords)) { | |
lwords <- list() | |
} | |
lwords[[anagram]] <- words | |
res[[len]] <- lwords | |
if (i %% 1000 == 0) { | |
message("Finished ", i, " / ", nanagrams) | |
} | |
} | |
res | |
} | |
chars <- function(word) { | |
strsplit(word, split = "", fixed = TRUE)[[1]] | |
} | |
join <- function(chs) { | |
paste0(chs, collapse = "") | |
} | |
joinc <- function(chs) { | |
stringi::stri_paste(chs, collapse = "") | |
} | |
word_canon <- function(word) { | |
joinc( | |
sort( | |
chars(word) | |
)) | |
} | |
make_subwords <- function(word) { | |
chs <- sort(chars(word)) | |
nchs <- length(chs) | |
res <- rep("", nchs) | |
for (i in 1:nchs) { | |
res[[i]] <- join(chs[-i]) | |
} | |
unique(res) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Sometimes a man wants to play some Letter Quest Remastered on his Switch while watching the Great British Bake Off on Netflix.
And sometimes a man just wants to pointlessly cheat at Letter Quest and has RStudio open.
I made no effort to think hard or do any meaningful research on this problem. Imagine this is like one of those Peter Norvig blog posts, except that I have no idea what I'm doing.
Apart from the obvious issue of the availability of dynamically sized vectors and dictionaries in R itself, I was surprised to find how often profiling said the slowest part was joining a vector of single characters into a string.
Stuff I should have done: