Created
September 19, 2021 13:32
-
-
Save holnburger/bb9d285b0b2b89be07bc64b56487b875 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(rtweet) | |
library(tidyverse) | |
library(ggraph) | |
library(tidygraph) | |
library(ggrepel) | |
# get all the members from the bundestag from each faction, use lists provided by @pollytix_gmbh | |
mdb_list <- lists_members("973527951173193728") %>% mutate(party = "AfD") %>% | |
bind_rows(lists_members("973614171886686209") %>% mutate(party = "CDU")) %>% | |
bind_rows(lists_members("973520347634028545") %>% mutate(party = "CSU")) %>% | |
bind_rows(lists_members("973546510456705024") %>% mutate(party = "B90/Die Grünen")) %>% | |
bind_rows(lists_members("973561044709838849") %>% mutate(party = "Die Linke")) %>% | |
bind_rows(lists_members("973485249001934848") %>% mutate(party = "FDP")) %>% | |
bind_rows(lists_members("973582861361143809") %>% mutate(party = "SPD")) | |
party_color <- c( | |
"AfD" = "#1a9fde", | |
"SPD" = "#e10b1f", | |
"CDU" = "#565656", | |
"CSU" = "#727272", | |
"B90/Die Grünen" = "#499533", | |
"Die Linke" = "#bc3475", | |
"FDP" = "#e5d82d" | |
) | |
# get the last 3200 tweets of every user in that list | |
mdb_tweets <- get_timelines(mdb_list$user_id, n = 3200) | |
# not all profiles are public, lets check how many we got | |
mdb_tweets %>% distinct(user_id) %>% nrow() | |
# 538 from 547, that's okay (some may didn't tweet) | |
mdb_tweets %>% nrow() | |
# 976 508 tweets, keep in mind: max 3200 tweets per account | |
# extract certain german media | |
media_data <- tibble::tribble( | |
~media_regex, ~label_media, ~media_short, | |
"sz\\.de", "Süddeutsche Zeitung", "sz.de", | |
"sueddeutsche\\.de", "Süddeutsche Zeitung", "sueddeutsche.de", | |
"tagesschau\\.de", "Tagesschau", "tagesschau.de", | |
"bild\\.de", "Bild", "bild.de", | |
"welt\\.de", "Welt", "welt.de", | |
"focus\\.de", "Focus", "focus.de", | |
"spiegel\\.de", "Spiegel", "spiegel.de", | |
# "stern\\.de", "Stern", "stern.de", | |
# "jungewelt\\.de", "Junge Welt", "jungewelt.de", | |
"zeit\\.de", "Zeit Online", "zeit.de", | |
# "jungefreiheit\\.de", "Junge Freiheit", "jungefreiheit.de", | |
# "jungle\\.world", "Jungle World", "jungle.world", | |
"taz\\.de", "taz", "taz.de", | |
# "freitag\\.de", "Der Freitag", "freitag.de", | |
"compact-online\\.de", "Compact Magazin", "compact-online.de", | |
# "tichyseinblick\\.de", "Tichys Einblick", "tichyseinblick.de", | |
"reitschuster\\.de", "Reitschuster", "reitschuster.de", | |
"faz\\.net", "Frankfurter Allgemeine Zeitung", "faz.net", | |
"handelsblatt\\.de", "Handelsblatt", "handelsblatt.de", | |
"nd-aktuell\\.de", "neues deutschland", "nd-aktuell.de", | |
) | |
# count the media mentions per user_id | |
mdb_media_mention <- mdb_tweets %>% | |
select(user_id, screen_name, name, urls_expanded_url) %>% | |
unnest(urls_expanded_url) %>% | |
mutate(media = str_extract(urls_expanded_url, paste(media_data$media_regex, collapse = "|"))) %>% | |
filter(!is.na(media)) %>% | |
left_join(media_data, by = c("media" = "media_short")) %>% | |
count(user_id, label_media) | |
# set random labels for mdbs (used for later) | |
set.seed(4465) # random seed, just for reproduction | |
mdb_labels <- mdb_list %>% | |
group_by(party) %>% | |
sample_n(2) %>% | |
mutate(label_mdb = paste0(str_remove(name, ",.*$|MdB.*$") %>% str_trim(), "\n", party)) %>% | |
ungroup() %>% | |
select(user_id, label_mdb) | |
media_mention_total <- mdb_media_mention %>% | |
group_by(label_media) %>% | |
summarise(n = sum(n)) %>% | |
arrange(desc(n)) %>% | |
mutate(user_id = label_media) # needed for joining later | |
graph_dat <- mdb_media_mention %>% | |
as_tbl_graph() %>% | |
activate(nodes) %>% | |
rename(user_id = name) %>% # as_tbl_graph automatic renaming to name | |
left_join(mdb_list %>% | |
left_join(mdb_labels, by = "user_id"), | |
by = "user_id") %>% | |
left_join(media_mention_total %>% mutate(type = "media"), by = "user_id") %>% | |
mutate(label = if_else(is.na(label_media), label_mdb, label_media)) %>% | |
mutate(party = if_else(is.na(label_media), party, "media")) %>% | |
mutate(n = if_else(is.na(n), 10L, n)) # used as minimal size for later | |
ggraph(graph_dat, layout = "sugiyama") + | |
geom_edge_link(alpha = 0.01) + | |
geom_node_point(aes(size = n, color = party), alpha = 1) + | |
geom_node_label(aes(label = label), | |
repel = TRUE, size = 3, segment.color = "#474747", | |
alpha = 0.8, | |
direction = "y", | |
min.segment.length = 0, | |
position = position_nudge_repel(y = 0.1), | |
max.overlaps = 20) + | |
scale_color_manual(values = party_color) + | |
theme_void() + theme(legend.position="none") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment