Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Liverpool FC's Age-Utility Graph
## Packages
```{r}
library(rvest)
library(polite) # devtools::install_github("dmi3kno/polite")
library(dplyr)
library(tidyr)
library(ggplot2)
library(scales)
library(purrr)
library(stringr)
library(ggrepel)
library(glue)
library(extrafont)
#loadfonts()
```
## Webscrape transfermarkt.com with rvest + polite
```{r}
# first time using "polite" package for responsible web scraping!
session <- bow("https://www.transfermarkt.com/liverpool-fc/leistungsdaten/verein/31/reldata/GB1%262017/plus/1")
print(session)
# "The path is scrapable for this user-agent": OK, looks like we are good to go!
# scraping tranfermarkt is a nightmare...
# scrape each col individually then combine later...
# grab name from photo element instead
result_name <- scrape(session) %>%
html_nodes("#yw1 .bilderrahmen-fixed") %>%
html_attr("title")
# grab age
result_age <- scrape(session) %>%
html_nodes(".posrela+ .zentriert") %>%
html_text()
# grab minutes played in league
result_mins <- scrape(session) %>%
html_nodes("td.rechts") %>%
html_text()
```
## Tidy data
- NOTE: changed y-axis to CURRENT AGE instead so it'll be more up-to-date, code below + plot code reflects these changes
```{r}
# place each vector into list
resultados <- list(result_name, result_age, result_mins)
col_name <- c("name", "age", "minutes")
# then reduce(cbind) to combine them, set names to cols
resultados %>%
reduce(cbind) %>%
as_tibble() %>%
set_names(col_name) -> results_comb
# NOICE.gif
glimpse(results_comb)
# players who have had 2 birthdays since beginning of 17/18 season
age_plus_one <- c("Lovren", "Van Dijk", "Moreno", "Ings")
# fix "strings" into proper formats, calculate % of minutes appeared
lfc_minutes <- results_comb %>%
mutate(age = as.numeric(age),
minutes = minutes %>%
str_replace("\\.", "") %>%
str_replace("'", "") %>%
as.numeric(),
min_perc = (minutes / 3420) %>% round(digits = 3)) %>%
filter(!is.na(minutes)) %>%
separate(name, into = c("first_name", "last_name"), by = " ") %>%
# manually fix some names
mutate(
last_name = case_when(
first_name == "Trent" ~ "Alexander-Arnold",
first_name == "Virgil" ~ "Van Dijk",
first_name == "Alex" ~ "Oxlade-Chamberlain",
TRUE ~ last_name),
age = age + 1) %>% # do CURRENT age instead for plot 2.0
# add an additional year to age for specific players
mutate(
age = case_when(
last_name %in% age_plus_one ~ age + 1,
TRUE ~ age)
) %>%
# can't be arsed to scrape them individually so manually add the new lads
add_row(
first_name = " ",
last_name = "Alisson",
age = 25,
minutes = 3330,
min_perc = 0.974
) %>%
add_row(
first_name = " ",
last_name = "Fabinho",
age = 24,
minutes = 3060,
min_perc = 0.895
) %>%
add_row(
first_name = "Naby",
last_name = "Keita",
age = 23,
minutes = 1966,
min_perc = 0.642
) %>%
add_row(
first_name = "Xherdan",
last_name = "Shaqiri",
age = 26,
minutes = 3049,
min_perc = 0.892
) %>%
# create identifier for new vs. old player for labelling purposes
mutate(
new_player = case_when(
last_name %in% c("Alisson", "Fabinho", "Keita", "Shaqiri") ~ TRUE,
TRUE ~ FALSE)) %>%
arrange(desc(min_perc))
# rectanglular highlight for players in their prime:
rect_df <- data.frame(
xmin = 24, xmax = 30,
ymin = -Inf, ymax = Inf
)
```
## PLOT
```{r fig.height=6, fig.width=8}
lfc_minutes %>%
ggplot(aes(x = age, y = min_perc)) +
geom_rect(
data = rect_df, inherit.aes = FALSE,
aes(xmin = xmin, xmax = xmax,
ymin = ymin, ymax = ymax),
alpha = 0.3,
fill = "firebrick1") +
geom_point(color = "red", size = 2.5) +
geom_text_repel(
data = lfc_minutes %>% filter(!new_player == TRUE),
aes(label = last_name, family = "Roboto Condensed"),
nudge_x = 0.5,
seed = 6) +
geom_text_repel(
data = lfc_minutes %>% filter(new_player == TRUE),
aes(label = last_name, family = "Roboto Condensed", fontface = "bold"),
size = 4, nudge_x = 0.5, nudge_y = 0.02,
seed = 8) +
scale_y_continuous(
expand = c(0.01, 0),
limits = c(0, 1),
labels = percent_format()) +
scale_x_continuous(
breaks = pretty_breaks(n = 10)) +
labs(
x = "Current Age (As of Aug. 5th, 2018)", y = "% of Minutes Played",
title = "Liverpool FC: Age-Utility Matrix",
subtitle = "Premier League 17/18 (Summer 2018 transfers in bold, departed players left in for comparison)",
caption = "Data from transfermarkt.com\nInspired by @FinerMargins\nBy @R_by_Ryo") +
theme_bw() +
theme(
text = element_text(family = "Roboto Condensed"),
panel.grid.minor.y = element_blank()) +
geom_label(
aes(x = 20.5, y = 0.87,
hjust = 0.5,
label = glue("
Encouraging to see Liverpool buying players both in
their prime and regulars in their previous teams.
Our entire best 'Starting XI' are going to be
in their prime this season!
"),
family = "Roboto Condensed"),
size = 3.5)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.