Last active
December 26, 2022 13:19
-
-
Save Ryo-N7/f230d30cc6d1d9d30173ea285b945cb3 to your computer and use it in GitHub Desktop.
Player minutes chart Liverpool FC, 2010-2011 (Hodgson vs. Dalglish)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## pacman pkg to load/install libraries from cran | |
## polite is a github only pkg though. | |
pacman::p_load(tidyverse, polite, scales, ggimage, ggforce, | |
rvest, glue, extrafont, ggrepel, magick) | |
loadfonts() | |
## Squad details for 2010-2011 season | |
url <- "https://www.transfermarkt.com/liverpool-fc/leistungsdaten/verein/31/reldata/GB1%262010/plus/1" | |
session <- bow(url) | |
squad_table_raw <- scrape(session) %>% | |
html_nodes(".hauptlink > div > span > a") %>% | |
html_attr("href") | |
squad_table_clean <- squad_table_raw %>% | |
enframe() %>% | |
select(-name) %>% | |
distinct() %>% | |
separate(value, into = c("1", "2", '3', '4', '5'), sep = "\\/") %>% | |
select(player_name = 2, id_num = 5) | |
## add links | |
squad_table_df <- squad_table_clean %>% | |
mutate(link = glue::glue("https://www.transfermarkt.com/{player_name}/leistungsdatendetails/spieler/{id_num}/saison/2010/verein/31/liga/0/wettbewerb/GB1/pos/0/trainer_id/0/plus/1")) %>% | |
## remove Amoo, tom ince, Dalla Valle | |
slice(-31, -38, -41) | |
glimpse(squad_table_df) | |
## base dates | |
## use someone like Skrtel who played/in squad of every single game: | |
base_url <- "https://www.transfermarkt.com/martin-skrtel/leistungsdatendetails/spieler/24180/saison/2010/verein/31/liga/0/wettbewerb/GB1/pos/0/trainer_id/0/plus/1" | |
session <- bow(base_url) | |
base_raw <- scrape(session) %>% | |
html_nodes("div.responsive-table:nth-child(3) > table:nth-child(1)") %>% | |
html_table(fill = TRUE) %>% | |
.[[1]] %>% | |
janitor::clean_names() %>% | |
slice(-n()) | |
base_dates <- base_raw %>% | |
select(date, home = home_team_2, away = away_team_2, | |
result, goal = x, assist = x_2, | |
sub_in = x_7, sub_out = x_8, minutes = x_9) %>% | |
## make sure minutes == 0 for BASE | |
mutate(date = lubridate::mdy(date), | |
minutes = 0) %>% | |
## set sub_in, sub_out = 0 | |
mutate(sub_in = 0, | |
sub_out = 0) %>% | |
## set goals/assists = 0 | |
mutate(goal = 0, | |
assist = 0) %>% | |
## separate result | |
separate(result, into = c("home_goal", "away_goal"), | |
sep = ":", convert = TRUE) %>% | |
## home - away and rank | |
mutate(home_rank = home %>% str_extract("[0-9]+") %>% as.numeric, | |
away_rank = away %>% str_extract("[0-9]+") %>% as.numeric, | |
home = home %>% str_remove_all("\\(.*\\)"), | |
away = away %>% str_remove_all("\\(.*\\)")) | |
saveRDS(base_dates, file = here::here("data/base_LFC_10_11_dates_df.RDS")) | |
base_dates <- readRDS(file = here::here("data/base_LFC_10_11_dates_df.RDS")) | |
## get_appearances() function | |
get_appearances <- function(link) { | |
session <- bow(link) | |
appearances_raw <- scrape(session) %>% | |
html_nodes("div.responsive-table:nth-child(3) > table:nth-child(1)") %>% | |
html_table(fill = TRUE) %>% | |
.[[1]] %>% | |
#magrittr::extract2(1) %>% | |
janitor::clean_names() %>% | |
slice(-n()) | |
appearances_clean <- appearances_raw %>% | |
select(date, home = home_team_2, away = away_team_2, | |
result, goal = x, assist = x_2, | |
sub_in = x_7, sub_out = x_8, minutes = x_9) %>% | |
## fix minutes | |
## mutate_at(), mutate_if()... | |
mutate(date = lubridate::mdy(date), | |
minutes = | |
if_else(str_detect(minutes, "'"), | |
str_replace_all(minutes, "'", ""), minutes), | |
minutes = if_else(str_detect(minutes, "^[0-9]+$"), | |
minutes, "0") %>% as.numeric()) %>% | |
## fix sub_in, sub_out | |
mutate(sub_in = | |
if_else(str_detect(sub_in, "'"), | |
str_replace_all(sub_in, "'", ""), sub_in), | |
sub_in = if_else(str_detect(sub_in, "^[0-9]+$"), | |
sub_in, "0") %>% as.numeric(), | |
sub_out = | |
if_else(str_detect(sub_out, "'"), | |
str_replace_all(sub_out, "'", ""), sub_out), | |
sub_out = if_else(str_detect(sub_out, "^[0-9]+$"), | |
sub_out, "0") %>% as.numeric()) %>% | |
## fix goals/assists | |
mutate(goal = if_else(str_detect(goal, "^[0-9]+$"), | |
goal, "0") %>% as.numeric(), | |
assist = if_else(str_detect(assist, "^[0-9]+$"), | |
assist, "0") %>% as.numeric()) %>% | |
## separate result | |
separate(result, into = c("home_goal", "away_goal"), | |
sep = ":", convert = TRUE) %>% | |
## home - away and rank | |
mutate(home_rank = home %>% str_extract("[0-9]+") %>% as.numeric, | |
away_rank = away %>% str_extract("[0-9]+") %>% as.numeric, | |
home = home %>% str_remove_all("\\(.*\\)"), | |
away = away %>% str_remove_all("\\(.*\\)")) | |
## deal with no match rows: | |
## basically using base df, anti_join on dates and | |
## insert info for rows where missing | |
add_df <- base_dates %>% | |
anti_join(appearances_clean, by = c("date")) | |
## combine missing data with existing | |
appearances_clean <- appearances_clean %>% | |
full_join(add_df) %>% | |
arrange(date) | |
} | |
## iterate over each player | |
appearances_df_raw <- map2(.x = squad_table_df$link, | |
.y = squad_table_df$player_name, | |
~ get_appearances(link = .x) %>% | |
mutate(name = .y)) | |
saveRDS(appearances_df_raw, | |
file = glue("{here::here()}/data/appearances_df_raw_LFC_10_11.RDS")) | |
appearances_df_raw <- readRDS( | |
file = glue("{here::here()}/data/appearances_df_raw_LFC_10_11.RDS")) | |
appearances_df_LFC_10_11 <- appearances_df_raw %>% | |
reduce(bind_rows) %>% | |
group_by(name) %>% | |
mutate(match_num = row_number()) %>% | |
mutate(end = seq(from = 90, to = 3420, by = 90), | |
start = lag(end, default = 0), | |
dur = if_else(minutes == 90, start, end - minutes)) %>% | |
## for sub-outs | |
mutate(end = case_when( | |
sub_out != 0 ~ start + sub_out, | |
TRUE ~ end), | |
dur = case_when( | |
sub_out != 0 ~ start, | |
TRUE ~ dur)) %>% | |
ungroup() %>% | |
## fix Joe Cole manually, didn't consider expulsion due to red cards | |
## there's probably a few others but I'll do that another time... | |
mutate(end = case_when( | |
name == "joe-cole" & match_num == 1 ~ dur, | |
TRUE ~ end), | |
dur = case_when( | |
name == "joe-cole" & match_num == 1 ~ 0, | |
TRUE ~ dur | |
)) %>% | |
## fix names and label positions | |
mutate(name = str_replace_all(name, "-", " ") %>% str_to_title(), | |
position = case_when(row_number() %in% 1:190 ~ "GK", | |
row_number() %in% 191:760 ~ "DF", | |
row_number() %in% 761:1102 ~ "MF", | |
row_number() %in% 1103:1482 ~ "ST"), | |
position = as_factor(position) %>% | |
fct_relevel("GK", "DF", "MF", "ST"), | |
name = as_factor(name)) %>% | |
arrange(position, name) | |
## save | |
saveRDS(appearances_df_LFC_10_11, | |
file = glue("{here::here()}/data/appearances_df_LFC_10_11.RDS")) | |
appearances_df_LFC_10_11 <- readRDS( | |
file = glue("{here::here()}/data/appearances_df_LFC_10_11.RDS")) | |
## check that each player has data for 38 games... | |
appearances_df_LFC_10_11 %>% | |
group_by(name) %>% | |
summarize(num = n()) | |
## plot | |
## dataframes for vertical + horizontal divider lines | |
divide_lines <- tibble(yint = seq(0.5, 39.5, by = 1)) | |
verticolo <- tibble(verts_start = seq(0, 3420, by = 90), | |
verts_end = seq(0, 3420, by = 90), | |
y_low = 0.5, | |
y_high = 39.5) | |
appearances_df_LFC_10_11 %>% | |
ggplot(aes(x = dur, xend = end, | |
y = name, | |
yend = name)) + | |
## Woy | |
geom_segment(data = appearances_df_LFC_10_11 %>% filter(match_num < 21), | |
aes(group = match_num), | |
size = 3, color = "darkred") + | |
## King Kenny | |
geom_segment(data = appearances_df_LFC_10_11 %>% filter(match_num >= 21), | |
aes(group = match_num), | |
size = 3, color = "darkgreen") + | |
geom_segment(data = verticolo, | |
aes(x = verts_start, xend = verts_end, | |
y = y_low, yend = y_high)) + | |
## Dividers | |
geom_segment(x = 1800, xend = 1800, | |
y = 0.5, yend = 39.5, | |
color = "darkgrey", size = 1.1) + | |
geom_hline(data = divide_lines, aes(yintercept = yint), | |
size = 0.5) + | |
scale_x_continuous(breaks = seq(45, 3420, 90), | |
labels = seq(1, 38, 1), | |
expand = c(0, 0)) + | |
expand_limits(y = c(0.1, 43)) + | |
## Woy | |
annotate(geom = "segment", | |
x = 945, xend = 945, | |
y = 40, yend = 40.5, | |
color = "black", size = 1) + | |
annotate(geom = "label", | |
label = "Roy Hodgson (W: 7 D: 4 L: 9)", | |
x = 945, y = 41, family = "Roboto Condensed") + | |
annotate(geom = "segment", | |
x = 7, xend = 1790, | |
y = 40, yend = 40, | |
color = "black", size = 1) + | |
annotate(geom = "segment", | |
x = 7, xend = 7, | |
y = 39.7, yend = 40.3, | |
color = "black", size = 1) + | |
annotate(geom = "segment", | |
x = 1790, xend = 1790, | |
y = 39.7, yend = 40.3, | |
color = "black", size = 1) + | |
## King Kenny | |
annotate(geom = "segment", | |
x = 2655, xend = 2655, | |
y = 40, yend = 40.5, | |
color = "black", size = 1) + | |
annotate(geom = "label", | |
label = "Kenny Dalglish (W: 10 D: 3 L: 5)", | |
x = 2655, y = 41, family = "Roboto Condensed") + | |
annotate(geom = "segment", | |
x = 1810, xend = 3410, | |
y = 40, yend = 40, | |
color = "black", size = 1) + | |
annotate(geom = "segment", | |
x = 1810, xend = 1810, | |
y = 39.7, yend = 40.3, | |
color = "black", size = 1) + | |
annotate(geom = "segment", | |
x = 3410, xend = 3410, | |
y = 39.7, yend = 40.3, | |
color = "black", size = 1) + | |
labs(title = "Player Minutes | Liverpool FC | Season 2010-2011", | |
subtitle = "Players Ordered by Position (ST, MF, DF, GK)", | |
x = "Minutes Played per Game Week", y = "", | |
caption = glue::glue(" | |
Data: transfermarkt.com | |
By: @R_by_Ryo")) + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed"), | |
axis.title = element_text(size = 14), | |
axis.text.x = element_text(color = "black", size = 11), | |
axis.text.y = element_text(color = "black", size = 10), | |
panel.grid = element_blank(), | |
plot.title = element_text(size = 20), | |
plot.subtitle = element_text(size = 16), | |
plot.caption = element_text(size = 12)) | |
ggsave(filename = here::here("Premier League 2018-2019/output/player_minutes_LFC_10_11.png"), | |
height = 9, width = 12) | |
# add logo with Magick using Thomas Mock's custom function | |
# check out the explanation in his blog post: https://themockup.netlify.com/posts/2019-01-09-add-a-logo-to-your-plot/ | |
add_logo <- function(plot_path, logo_path, logo_position, logo_scale = 10){ | |
# Requires magick R Package https://github.com/ropensci/magick | |
# Useful error message for logo position | |
if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) { | |
stop("Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'") | |
} | |
# read in raw images | |
plot <- magick::image_read(plot_path) | |
logo_raw <- magick::image_read(logo_path) | |
# get dimensions of plot for scaling | |
plot_height <- magick::image_info(plot)$height | |
plot_width <- magick::image_info(plot)$width | |
# default scale to 1/10th width of plot | |
# Can change with logo_scale | |
logo <- magick::image_scale(logo_raw, as.character(plot_width/logo_scale)) | |
# Get width of logo | |
logo_width <- magick::image_info(logo)$width | |
logo_height <- magick::image_info(logo)$height | |
# Set position of logo | |
# Position starts at 0,0 at top left | |
# Using 0.01 for 1% - aesthetic padding | |
if (logo_position == "top right") { | |
x_pos = plot_width - logo_width - 0.01 * plot_width | |
y_pos = 0.01 * plot_height | |
} else if (logo_position == "top left") { | |
x_pos = 0.01 * plot_width | |
y_pos = 0.01 * plot_height | |
} else if (logo_position == "bottom right") { | |
x_pos = plot_width - logo_width - 0.01 * plot_width | |
y_pos = plot_height - logo_height - 0.01 * plot_height | |
} else if (logo_position == "bottom left") { | |
x_pos = 0.01 * plot_width | |
y_pos = plot_height - logo_height - 0.01 * plot_height | |
} | |
# Compose the actual overlay | |
magick::image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos)) | |
} | |
# add_logo and save | |
plot_logo <- add_logo(plot_path = here::here("Premier League 2018-2019/output/player_minutes_LFC_10_11.png"), | |
logo_path = "https://upload.wikimedia.org/wikipedia/en/thumb/0/0c/Liverpool_FC.svg/800px-Liverpool_FC.svg.png", | |
logo_position = "top right", | |
logo_scale = 18) | |
image_write(image = plot_logo, path = here::here("Premier League 2018-2019/output/player_minutes_LFC_10_11_logo.png")) |
Author
Ryo-N7
commented
Jun 25, 2019
•
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment