Skip to content

Instantly share code, notes, and snippets.

@ColinFay
Created November 18, 2017 18:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ColinFay/4f793618024ff0ea9bddf17f2e9d3813 to your computer and use it in GitHub Desktop.
Save ColinFay/4f793618024ff0ea9bddf17f2e9d3813 to your computer and use it in GitHub Desktop.
Network Breizh Data Day
library(tidyverse)
library(ggraph)
library(igraph)
library(tidytext)
library(proustr)
bdd <- read.csv("breizhdataday.csv", stringsAsFactors = FALSE)
sw <- data.frame(word = c(as.character(proust_stopwords()$word),
"https","rt", "d'une", "t.co"))
# Bigrams
tm_bdd <- bdd %>%
unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
separate(bigram, c("from", "to"), sep = " ") %>%
filter(!from %in% sw$word) %>%
filter(!to %in% sw$word) %>%
count(from, to, sort = TRUE) %>%
rename(size = n) %>%
filter(size >= 3)
bdd_graph <- graph_from_data_frame(tm_bdd, directed = F)
bdd_clust <- cluster_edge_betweenness(bdd_graph)
#Draw
ggraph(bdd_graph) +
geom_edge_link()+
geom_node_label(aes(label = name,
color = as.factor(bdd_clust$membership)),
show.legend = FALSE) +
scale_color_viridis_d(option = "A") +
labs(title = "Bigrammes dans #BreizhDataDay",
subtitle = "data depuis Twitter",
caption = "@_colinfay") +
theme_graph()
parse_mention <- function(user, vec){
a <- jsonlite::fromJSON(vec, simplifyDataFrame = TRUE)
res <- a$user_mentions
res$user <- user
select(res, from = user, to = screen_name)
}
safe_parse <- safely(parse_mention)
edges_users <- map2(bdd$from_user, bdd$entities_str,
~ safe_parse(user = .x, vec = .y) ) %>%
map("result") %>%
compact() %>%
reduce(bind_rows)
bdd_graph_user <- graph_from_data_frame(edges_users)
bdd_clust_user <- cluster_edge_betweenness(bdd_graph_user)
ggraph(bdd_graph_user) +
geom_edge_link(arrow = grid::arrow(angle = 10, unit(0.1, "inches")))+
geom_node_label(aes(label = name,
color = as.factor(bdd_clust_user$membership)),
show.legend = FALSE) +
scale_color_viridis_d(option = "D") +
labs(title = "Mentions dans #BreizhDataDay",
subtitle = "data depuis Twitter",
caption = "@_colinfay") +
theme_graph()
parse_hashtag <- function(user, vec){
a <- jsonlite::fromJSON(vec, simplifyDataFrame = TRUE)
res <- a$hashtags
res$hash <- "BreizhDataDay"
filter(res, text != "BreizhDataDay", text != "breizhdataday") %>%
select(from = hash, to = text)
}
safe_parse <- safely(parse_hashtag)
edges_hash <- map2(bdd$from_user, bdd$entities_str,
~ safe_parse(user = .x, vec = .y) ) %>%
map("result") %>%
compact() %>%
reduce(bind_rows)
bdd_graph_hash <- graph_from_data_frame(edges_hash)
bdd_clust_hash <- cluster_edge_betweenness(bdd_graph_hash)
ggraph(bdd_graph_hash) +
geom_edge_link()+
geom_node_label(aes(label = name,
color = as.factor(bdd_clust_hash$membership)),
show.legend = FALSE) +
scale_color_viridis_d(option = "B") +
labs(title = "Hashtags dans #BreizhDataDay",
subtitle = "data depuis Twitter",
caption = "@_colinfay") +
theme_graph()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment