Skip to content

Instantly share code, notes, and snippets.

@arcaravaggi
Last active April 17, 2020 09:21
Show Gist options
  • Save arcaravaggi/7026b017a3909df2da3b09d67cdb8e59 to your computer and use it in GitHub Desktop.
Save arcaravaggi/7026b017a3909df2da3b09d67cdb8e59 to your computer and use it in GitHub Desktop.
Function to create network data objects
# There's probably a much neater way to do this but if it works, it works. And this? It works.
set.seed(24)
df <- data.frame(col1 = rep(LETTERS[1:4], 10),
col2 = rep(1:10, 4))
# Function to create network data objects ####
# - a data frame of usernames that interact with >1 hashtag ('dataframe')
# - edges and nodes used in creating networks ('edges', 'nodes')
# - the network object ('routes')
# from a list of data frames
#
#
# See stepwise code, below, for a walk-through
#
# l = list of data frames
#
confNet <- function(l){
veCombine <- function(x,m){
n <- ifelse(length(x) == 1,ifelse(is.numeric(x),x,1),length(x))
if(n >= m) return(combn(x,m))
a <- do.call(expand.grid, rep(list(x),m))
b <- t(unique(t(apply(a,1,sort))))
`dimnames<-`(b,NULL)
}
reCombine <-function(x, m){
a <- veCombine(x, m)
b <- data.frame(from = a[1,], to = a[2,])
return(b)
}
list <- split(l, l$username)
list <- list[sapply(list, function(x) dim(x)[1]) > 1]
d <- lapply(list, function(x) reCombine(x$hashtag, 2))
d2 <- plyr::ldply(d, data.frame)
edges <- d2[!(d2$from == d2$to),]
edges[,1] <- NULL
edges<- na.omit(edges)
names(edges) <- c("source", "destination")
edges <- edges %>%
group_by(source, destination) %>%
summarise(n = n()) %>%
mutate(n = scales::rescale(n, to = c(0:1))) %>%
rename(weight = n) %>%
ungroup()
nodes <- data.frame(label = unique(d2$from), id = 1:length(unique(d2$from)))
edges <- edges %>%
left_join(nodes, by = c("source" = "label")) %>%
rename(from = id)
edges <- edges %>%
left_join(nodes, by = c("destination" = "label")) %>%
rename(to = id)
edges <- select(edges, from, to, weight)
routes <- tbl_graph(nodes = nodes, edges = edges, directed = TRUE)
routes %>%
activate(edges) %>%
arrange(desc(weight))
o <- list(d2, edges, nodes, routes)
names(o) <- c("dataframe", "edges", "nodes", "routes")
return(o)
}
netlist <- confNet(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment