Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Calculate string distance(s) & group into networks
#### libraries ----
# install.packages("intergraph")
library(dplyr)
library(purrr)
library(furrr)
library(GGally)
library(igraph)
library(stringdist)
library(tidyr) # for expand_grid
#### environment parameters ----
plan(multiprocess)
#### helper function(s) ----
similar_strings <- function(df, x, y = NULL, dist = 0.7, ...) {
x <- enquo(x)
y <- enquo(y)
if (quo_name(y) == "NULL")
y <- x
dat <- expand_grid(x = df[[quo_name(x)]],
y = df[[quo_name(y)]]) %>%
mutate(string_dist = pmap_dbl(., ~stringsim(.x, .y))) %>%
filter(string_dist < 1, string_dist >= dist)
out <- list()
out$distances <- dat
out$graph <- dat %>%
select(x, y) %>%
graph_from_data_frame()
out$networks <- decompose.graph(out$graph)
out$similar_strings <- lapply(out$networks, function(x) V(x)$name)
out
}
#### load data ----
data(mtcars)
dat_mtcars <- as_tibble(mtcars) %>%
mutate(models = rownames(mtcars))
#### Calculate string distance(s) & group into networks ----
dat <- similar_strings(dat_mtcars, models, dist = 0.6)
# list of similar strings
dat$similar_strings
# plot full graph
ggnet2(dat$graph, label = TRUE, layout.exp = 1.1)
# plot networks individually
lapply(dat$networks, ggnet2, label = TRUE, layout.exp = 1.1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.