Skip to content

Instantly share code, notes, and snippets.

@ivelasq
Last active January 3, 2021 15:24
Show Gist options
  • Save ivelasq/68bc67c446aae0fe738fd0d723529e42 to your computer and use it in GitHub Desktop.
Save ivelasq/68bc67c446aae0fe738fd0d723529e42 to your computer and use it in GitHub Desktop.
Age Distribution of the Top 100 Super Smash Bros. Melee Players (2019)
##########################################################
# Get Ages of Top 100 Super Smash Brothers Melee Players #
##########################################################
# Tutorial ----------------------------------------------------------------
# https://www.r-bloggers.com/2020/05/intro-to-polite-web-scraping-of-soccer-data-with-r/
# Library -----------------------------------------------------------------
library(tidyverse)
library(rvest)
library(httr)
library(polite)
library(lubridate)
library(ggdark)
library(showtext)
# Fonts --------------------------------------------------------------------
font_add_google("Archivo Black", "archivo+black")
font_add_google("Rozha One", "rozha+one")
# Create Data -------------------------------------------------------------
url <- "https://liquipedia.net/smash/SSBMRank"
url_bow <- polite::bow(url)
ssbm_html <-
polite::scrape(url_bow) %>%
rvest::html_nodes("table.wikitable") %>%
rvest::html_table(fill = TRUE)
ssbm_tab <-
ssbm_html %>%
.[[12]] %>%
janitor::row_to_names(1) %>%
janitor::clean_names() %>%
mutate(urls = paste0("https://liquipedia.net/smash/", player))
# Scrape Website ----------------------------------------------------------
age_get <- function(session) {
polite::scrape(session) %>%
html_nodes("[itemprop = 'birthDate']") %>%
html_text()
}
name_get <- function(session) {
polite::scrape(session) %>%
html_nodes("[itemprop = 'name']") %>%
html_text()
}
ages_get <- function(link, player) {
player <- rlang::enquo(player)
## `bow()` for every URL link
session <- bow(link)
## scrape different stats
player_age <- age_get(session = session)
player_name <- name_get(session = session)
## combine stats into a data frame
results <- list(player_name, player_age)
col_names <- c("name", "age")
player_info <-
results %>%
reduce(cbind) %>%
as_tibble() %>%
set_names(col_names) %>%
mutate(player = !!player)
return(player_info)
}
safe_ages_get <- safely(ages_get)
ssbm_ages_all <-
map2(.x = ssbm_tab$urls,
.y = ssbm_tab$player,
~ safe_ages_get(link = .x,
player = .y)) %>%
map("result") %>%
bind_rows()
# Calculate Ages ----------------------------------------------------------
ssbm_ages_pull <-
ssbm_ages_all %>%
mutate(dob = case_when(player == "Ka-Master" ~ as_date("1991-04-11"), # from https://www.ssbwiki.com/Smasher:Ka-Master
TRUE ~ as_date(str_extract(age, "(?<=\\().*?(?=\\))"))),
years_old = (unclass(today()) - unclass(dob)) / 365.25)
mean(ssbm_ages_pull$years_old)
median(ssbm_ages_pull$years_old)
# Create Plot -------------------------------------------------------------
showtext_auto()
quartz()
ssbm_ages_pull %>%
ggplot(aes(x = years_old)) +
geom_histogram(binwidth = 2,
color = "black",
fill = "#fc4843") +
dark_theme_minimal(base_family = "archivo+black") +
annotate(geom = "text",
x = 28.5,
y = 12.1,
size = 3,
family = "archivo+black",
label = "Average: 27.9",
hjust = "left",
lineheight = 0.9,
color = "white") +
geom_vline(xintercept = mean(ssbm_ages_pull$years_old),
linetype="dotted") +
scale_x_continuous(breaks = seq(19, 36, by = 2)) +
scale_y_continuous(limits = c(0, 13),
breaks = seq(0, 13, by = 2)) +
labs(title = "Age Distribution of the\nTop 100 Super Smash Bros. Melee Players (2019)",
x = "Age",
y = "Count of Players",
caption = "Source: Liquipedia") +
theme(plot.title = element_text(color = "white",
family = "rozha+one",
size = 16,
hjust = 0.5),
plot.caption = element_text(color = "white",
family = "archivo+black",
size = 8))
ggsave(here::here("ssbm.png"), dpi = "retina", width = 6, height = 4, units = "in")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment