Last active
January 3, 2021 15:24
-
-
Save ivelasq/68bc67c446aae0fe738fd0d723529e42 to your computer and use it in GitHub Desktop.
Age Distribution of the Top 100 Super Smash Bros. Melee Players (2019)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
########################################################## | |
# Get Ages of Top 100 Super Smash Brothers Melee Players # | |
########################################################## | |
# Tutorial ---------------------------------------------------------------- | |
# https://www.r-bloggers.com/2020/05/intro-to-polite-web-scraping-of-soccer-data-with-r/ | |
# Library ----------------------------------------------------------------- | |
library(tidyverse) | |
library(rvest) | |
library(httr) | |
library(polite) | |
library(lubridate) | |
library(ggdark) | |
library(showtext) | |
# Fonts -------------------------------------------------------------------- | |
font_add_google("Archivo Black", "archivo+black") | |
font_add_google("Rozha One", "rozha+one") | |
# Create Data ------------------------------------------------------------- | |
url <- "https://liquipedia.net/smash/SSBMRank" | |
url_bow <- polite::bow(url) | |
ssbm_html <- | |
polite::scrape(url_bow) %>% | |
rvest::html_nodes("table.wikitable") %>% | |
rvest::html_table(fill = TRUE) | |
ssbm_tab <- | |
ssbm_html %>% | |
.[[12]] %>% | |
janitor::row_to_names(1) %>% | |
janitor::clean_names() %>% | |
mutate(urls = paste0("https://liquipedia.net/smash/", player)) | |
# Scrape Website ---------------------------------------------------------- | |
age_get <- function(session) { | |
polite::scrape(session) %>% | |
html_nodes("[itemprop = 'birthDate']") %>% | |
html_text() | |
} | |
name_get <- function(session) { | |
polite::scrape(session) %>% | |
html_nodes("[itemprop = 'name']") %>% | |
html_text() | |
} | |
ages_get <- function(link, player) { | |
player <- rlang::enquo(player) | |
## `bow()` for every URL link | |
session <- bow(link) | |
## scrape different stats | |
player_age <- age_get(session = session) | |
player_name <- name_get(session = session) | |
## combine stats into a data frame | |
results <- list(player_name, player_age) | |
col_names <- c("name", "age") | |
player_info <- | |
results %>% | |
reduce(cbind) %>% | |
as_tibble() %>% | |
set_names(col_names) %>% | |
mutate(player = !!player) | |
return(player_info) | |
} | |
safe_ages_get <- safely(ages_get) | |
ssbm_ages_all <- | |
map2(.x = ssbm_tab$urls, | |
.y = ssbm_tab$player, | |
~ safe_ages_get(link = .x, | |
player = .y)) %>% | |
map("result") %>% | |
bind_rows() | |
# Calculate Ages ---------------------------------------------------------- | |
ssbm_ages_pull <- | |
ssbm_ages_all %>% | |
mutate(dob = case_when(player == "Ka-Master" ~ as_date("1991-04-11"), # from https://www.ssbwiki.com/Smasher:Ka-Master | |
TRUE ~ as_date(str_extract(age, "(?<=\\().*?(?=\\))"))), | |
years_old = (unclass(today()) - unclass(dob)) / 365.25) | |
mean(ssbm_ages_pull$years_old) | |
median(ssbm_ages_pull$years_old) | |
# Create Plot ------------------------------------------------------------- | |
showtext_auto() | |
quartz() | |
ssbm_ages_pull %>% | |
ggplot(aes(x = years_old)) + | |
geom_histogram(binwidth = 2, | |
color = "black", | |
fill = "#fc4843") + | |
dark_theme_minimal(base_family = "archivo+black") + | |
annotate(geom = "text", | |
x = 28.5, | |
y = 12.1, | |
size = 3, | |
family = "archivo+black", | |
label = "Average: 27.9", | |
hjust = "left", | |
lineheight = 0.9, | |
color = "white") + | |
geom_vline(xintercept = mean(ssbm_ages_pull$years_old), | |
linetype="dotted") + | |
scale_x_continuous(breaks = seq(19, 36, by = 2)) + | |
scale_y_continuous(limits = c(0, 13), | |
breaks = seq(0, 13, by = 2)) + | |
labs(title = "Age Distribution of the\nTop 100 Super Smash Bros. Melee Players (2019)", | |
x = "Age", | |
y = "Count of Players", | |
caption = "Source: Liquipedia") + | |
theme(plot.title = element_text(color = "white", | |
family = "rozha+one", | |
size = 16, | |
hjust = 0.5), | |
plot.caption = element_text(color = "white", | |
family = "archivo+black", | |
size = 8)) | |
ggsave(here::here("ssbm.png"), dpi = "retina", width = 6, height = 4, units = "in") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment