Last active
October 16, 2024 06:11
-
-
Save roblanf/5148c7d412af618f10daa9162d65c0ea to your computer and use it in GitHub Desktop.
three-letter-word problem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load necessary library | |
library(dplyr) | |
library(lexicon) | |
library(visNetwork) | |
# Step 1: List most common three-letter English words | |
data(sw_fry_1000) | |
three_letter_words <- sw_fry_1000[nchar(sw_fry_1000) == 3] | |
# Ensure all words are lowercase | |
three_letter_words <- tolower(three_letter_words) | |
# add other words as you like | |
three_letter_words = c(three_letter_words, c("rat", "not", "bag", "hag", "bet", "bum", "web", "its", "day", "two", "sex", "set", "map", "way", "off", "did", "car", "dvd", "per", "usa", "non", "why", "gay", "air", "hot", "say", "tax", "got", "act", "red", "key", "far", "win", "bed", "ass", "sea", "cut", "kit", "boy", "son", "bug", "mid", "lcd", "hiv", "kim", "lie", "eve", "rod", "sad", "ban", "rid", "rip", "pub", "dna", "eat", "gun", "sam", "tie", "ron", "rob", "gap", "sms", "cow", "lip", "sap", "dis", "fwd", "mud", "cop", "sir", "wet", "ram", "fox", "hop", "kid", "nfl", "sum", "vat", "hat", "ray", "ice", "yet", "sow", "bow", "low")) | |
#from ChatGPT | |
additional_words <- c( | |
"and", "the", "for", "are", "but", "you", "all", "can", "was", "her", "she", | |
"him", "had", "his", "out", "get", "man", "old", "now", "use", "how", "our", | |
"may", "see", "big", "end", "any", "let", "put", "own", "yes", "too", "bad", | |
"dog", "cat", "pig", "ant", "bee", "fly", "owl", "bat", "sun", "sky", "dry", | |
"top", "pay", "buy", "ask", "dig", "fix", "mix", "row", "hit", "fit", "sit", | |
"has", "nor", "mom", "sis", "fun", "joy", "box", "cup", "pen", "rug", "bus", | |
"van", "cab", "gas", "oil", "add", "sum", "law", "art", "god", "pie", "egg", | |
"elf", "fan", "fat", "fog", "gel", "gig", "gym", "hip", "hug", "ink", "jam", | |
"jar", "jet", "jog", "jug", "lap", "log", "mad", "mat", "nap", "nod", "nut", | |
"pad", "pan", "pat", "peg", "pet", "pin", "pit", "pop", "pot", "rag", "ran", | |
"rap", "rig", "rim", "rot", "row", "rub", "sat", "set", "sew", "shy", "sip", | |
"six", "ski", "sob", "tan", "tap", "tea", "ten", "tin", "tip", "toe", "ton", | |
"top", "toy", "try", "tub", "tug", "vet", "via", "vow", "war", "wax", "wig", | |
"wow", "yak", "yam", "zip", "zoo" | |
) | |
three_letter_words = c(three_letter_words, additional_words) | |
# Create a set for quick lookup | |
word_set <- unique(three_letter_words) | |
# Step 2: For each possible pair of words, check if the shifted concatenation produces valid words | |
results <- data.frame(Word1 = character(), | |
Word2 = character(), | |
ShiftedWords = character(), | |
stringsAsFactors = FALSE) | |
# Iterate over all ordered pairs of words | |
for (w1 in three_letter_words) { | |
for (w2 in three_letter_words) { | |
concat_word <- paste0(w1, w2, sep = "") # This is a 6-letter string | |
shifted_word <- substr(concat_word, 3, 5) | |
# Check if shifted words are valid three-letter words | |
if (shifted_word %in% word_set) { | |
# Add to results | |
results <- rbind(results, data.frame(Word1 = w1, | |
Word2 = w2, | |
shifted_word, | |
stringsAsFactors = FALSE)) | |
} | |
} | |
} | |
# Step 3: Print the table of valid pairs | |
print(results) | |
# Make a tibble of which words can follow any other word | |
follow_on_df <- results %>% | |
group_by(Word1) %>% | |
summarise( | |
follow_on = (paste(unique(Word2), collapse = ", ")), | |
fN = length(unique(Word2)) | |
) | |
# crazy graph | |
# Assuming follow_on_df is already created | |
# Example: | |
# follow_on_df <- data.frame( | |
# Word1 = c("cat", "dog"), | |
# follow_on = c("and, bat, mat", "cat, bat"), | |
# fN = c(3, 2), | |
# stringsAsFactors = FALSE | |
# ) | |
# Prepare your data | |
unique_words <- unique(c( | |
follow_on_df$Word1, | |
unlist(strsplit(follow_on_df$follow_on, ", ")) | |
)) | |
library(visNetwork) | |
library(dplyr) | |
# Sample 'follow_on_df' data frame | |
# Replace this with your actual data | |
# Example: | |
# follow_on_df <- data.frame( | |
# Word1 = c("tea", "and", "cat"), | |
# follow_on = c("and, the, bag", "cat, dog", "hat, run"), | |
# stringsAsFactors = FALSE | |
# ) | |
# Define your nodes based on 'follow_on_df' | |
nodes <- data.frame( | |
id = unique(c(follow_on_df$Word1, unlist(strsplit(follow_on_df$follow_on, ", ")))), | |
label = unique(c(follow_on_df$Word1, unlist(strsplit(follow_on_df$follow_on, ", ")))), | |
stringsAsFactors = FALSE | |
) | |
# Initialize empty edges data frame | |
edges <- data.frame( | |
from = character(), | |
to = character(), | |
stringsAsFactors = FALSE | |
) | |
# Loop through 'follow_on_df' to create edges pointing from Word1 to its follow-on words | |
for (i in 1:nrow(follow_on_df)) { | |
from_word <- follow_on_df$Word1[i] | |
to_words <- unlist(strsplit(follow_on_df$follow_on[i], ", ")) | |
# Create edges from 'from_word' to each 'to_word' | |
edges_temp <- data.frame( | |
from = from_word, | |
to = to_words, | |
stringsAsFactors = FALSE | |
) | |
edges <- rbind(edges, edges_temp) | |
} | |
# Assign default color to all edges | |
edges$color <- "grey" # Default edge color | |
# Optionally, assign unique IDs to edges if you have multiple identical edges | |
# edges$id <- 1:nrow(edges) | |
# Build the visNetwork graph | |
visNetwork(nodes, edges) %>% | |
visIgraphLayout(layout = "layout_with_fr") %>% # Apply layout algorithm | |
visEdges( | |
arrows = "to", | |
color = list( | |
inherit = FALSE # Allow individual edge colors to be used | |
) | |
) %>% | |
visPhysics(enabled = TRUE) %>% # Enable physics for node movement | |
visNodes( # Set node properties | |
shape = "circle", | |
font = list( | |
color = "black", | |
size = 20, | |
face = "arial", | |
align = "center", | |
vadjust = 0 | |
), | |
color = list( | |
background = "lightblue", | |
border = "darkblue", | |
highlight = "orange" | |
) | |
) %>% | |
visOptions(highlightNearest = TRUE, nodesIdSelection = TRUE) %>% | |
visEvents( | |
selectNode = "function(properties) { | |
var nodeId = properties.nodes[0]; | |
// Get all edges connected to the selected node | |
var allEdges = this.body.data.edges.get(); | |
// Iterate through all edges | |
allEdges.forEach(function(edge) { | |
if(edge.from === nodeId) { | |
// Highlight outgoing edges in green | |
edge.color = 'green'; | |
} else { | |
// Reset other edges to default color | |
edge.color = 'grey'; | |
} | |
// Update the edge in the network | |
this.body.data.edges.update(edge); | |
}.bind(this)); | |
// Optionally, you can focus on the selected node | |
this.fit({ | |
nodes: [nodeId], | |
animation: {duration: 500, easingFunction: 'easeInOutQuad'} | |
}); | |
}", | |
deselectNode = "function(properties) { | |
// Reset all edges to default color when no node is selected | |
var allEdges = this.body.data.edges.get(); | |
allEdges.forEach(function(edge) { | |
edge.color = 'grey'; | |
this.body.data.edges.update(edge); | |
}.bind(this)); | |
}" | |
) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
added some more words and a graph layout. Thanks ChatGPT