Skip to content

Instantly share code, notes, and snippets.

@pgstevenson
Last active September 5, 2019 05:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pgstevenson/ab5619ec2f7b97816157b4a8d1e8d9e9 to your computer and use it in GitHub Desktop.
Save pgstevenson/ab5619ec2f7b97816157b4a8d1e8d9e9 to your computer and use it in GitHub Desktop.
Calculate string distance(s) & group into networks
#### libraries ----
# install.packages("intergraph")
library(dplyr)
library(purrr)
library(furrr)
library(GGally)
library(igraph)
library(stringdist)
library(tidyr) # for expand_grid
#### environment parameters ----
plan(multiprocess)
#### helper function(s) ----
similar_strings <- function(df, x, y = NULL, dist = 0.7, ...) {
x <- enquo(x)
y <- enquo(y)
if (quo_name(y) == "NULL")
y <- x
dat <- expand_grid(x = df[[quo_name(x)]],
y = df[[quo_name(y)]]) %>%
mutate(string_dist = pmap_dbl(., ~stringsim(.x, .y))) %>%
filter(string_dist < 1, string_dist >= dist)
out <- list()
out$distances <- dat
out$graph <- dat %>%
select(x, y) %>%
graph_from_data_frame()
out$networks <- decompose.graph(out$graph)
out$similar_strings <- lapply(out$networks, function(x) V(x)$name)
out
}
#### load data ----
data(mtcars)
dat_mtcars <- as_tibble(mtcars) %>%
mutate(models = rownames(mtcars))
#### Calculate string distance(s) & group into networks ----
dat <- similar_strings(dat_mtcars, models, dist = 0.6)
# list of similar strings
dat$similar_strings
# plot full graph
ggnet2(dat$graph, label = TRUE, layout.exp = 1.1)
# plot networks individually
lapply(dat$networks, ggnet2, label = TRUE, layout.exp = 1.1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment