|
library(rvest) |
|
library(tidyverse) |
|
library(furrr) |
|
|
|
#--------functions-------- |
|
|
|
get_mep_overview <- function(x){ |
|
data_frame( |
|
mep_name = x %>% html_nodes(".member-name") %>% html_text(), |
|
mep_link = x %>% html_nodes(".single-member-container a") %>% |
|
html_attr("href") %>% |
|
paste0("http://www.europarl.europa.eu",.), |
|
mep_group = x %>% html_nodes(".ep-layout_group .ep_name") %>% html_text(), |
|
mep_party = x %>% html_nodes(".ep-layout_party .ep_name") %>% html_text(), |
|
mep_country = x %>% html_nodes(".ep-layout_country .ep_name") %>% html_text(), |
|
mep_id = mep_link %>% str_extract("\\d+$") |
|
) |
|
} |
|
|
|
get_mep_data <- function(x){ |
|
page <- read_html(x) |
|
|
|
data_frame( |
|
mep_id = x %>% str_extract("\\d+$"), |
|
website = page %>% html_nodes(".link_website") %>% html_attr("href") %>% list(), |
|
email = page %>% html_nodes(".link_email a") %>% html_attr("href") %>% list(), |
|
twitter = page %>% html_nodes(".link_twitt a") %>% html_attr("href") %>% list(), |
|
facebook = page %>% html_nodes(".link_fb a") %>% html_attr("href") %>% list(), |
|
youtube = page %>% html_nodes(".link_youtube a") %>% html_attr("href") %>% list(), |
|
linkedin = page %>% html_nodes(".link_linkedin a") %>% html_attr("href") %>% list(), |
|
instagram = page %>% html_nodes(".link_instagram a") %>% html_attr("href") %>% list(), |
|
mep_birthday = page %>% html_nodes("#birthDate") %>% html_text() %>% list() |
|
) |
|
} |
|
|
|
#--------data-------- |
|
|
|
mep_page <- read_html("http://www.europarl.europa.eu/meps/en/full-list/all") |
|
|
|
mep_df <- get_mep_overview(mep_page) |
|
|
|
plan(multiprocess) |
|
|
|
mep_social_media <- future_map_dfr(mep_df$mep_link, get_mep_data, .progress = TRUE) |
|
|
|
#--------plot-------- |
|
|
|
library(emojifont) |
|
library(ggalt) |
|
load.fontawesome() |
|
|
|
social_media_icons <- tibble::tribble( |
|
~icon, ~social_media, ~sm_color, |
|
"facebook", "facebook", "#3b5999", |
|
"fa-twitter", "twitter", "#55acee", |
|
"fa-instagram", "instagram", "#e4405f", |
|
"fa-linkedin", "linkedin", "#0077B5", |
|
"fa-youtube", "youtube", "#cd201f" |
|
) %>% |
|
mutate(icon = fontawesome(icon)) |
|
|
|
sm_plot_data <- mep_social_media %>% |
|
mutate_if(is.list, ~map(., ~length(.)) %>% unlist()) %>% |
|
mutate_if(is.integer, ~na_if(., 0)) %>% |
|
gather(twitter, facebook, youtube, linkedin, instagram, key = social_media, value = account, na.rm = TRUE) %>% |
|
count(social_media, sort = TRUE) %>% |
|
left_join(social_media_icons, by = "social_media") |
|
|
|
social_media_colors <- sm_plot_data$sm_color[order(sm_plot_data$n)] |
|
|
|
sm_plot_data %>% |
|
ggplot(aes(x = reorder(icon, n), y = n, color = sm_color)) + |
|
geom_lollipop() + |
|
scale_color_identity() + |
|
theme_void() + |
|
theme(axis.text.y = element_text(family='fontawesome-webfont', |
|
size = 25, |
|
color = social_media_colors, |
|
margin=margin(0,-15,0,0))) + |
|
guides(color = FALSE) + |
|
coord_flip() + |
|
labs(title = "Social Media Auftritte der Mitglieder des Europaparlaments", |
|
subtitle = "Angaben der 751 Abgeordneten auf der Webseite des Europaparlaments", |
|
caption = "holnburger.com", |
|
x = NULL, |
|
y = NULL) + |
|
scale_y_continuous(limits = c(0, 600)) + |
|
geom_text(aes(label = n), hjust = -0.5, size = 5) |