Skip to content

Instantly share code, notes, and snippets.

@Ryo-N7
Last active December 26, 2022 13:19
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ryo-N7/f230d30cc6d1d9d30173ea285b945cb3 to your computer and use it in GitHub Desktop.
Save Ryo-N7/f230d30cc6d1d9d30173ea285b945cb3 to your computer and use it in GitHub Desktop.
Player minutes chart Liverpool FC, 2010-2011 (Hodgson vs. Dalglish)
## pacman pkg to load/install libraries from cran
## polite is a github only pkg though.
pacman::p_load(tidyverse, polite, scales, ggimage, ggforce,
rvest, glue, extrafont, ggrepel, magick)
loadfonts()
## Squad details for 2010-2011 season
url <- "https://www.transfermarkt.com/liverpool-fc/leistungsdaten/verein/31/reldata/GB1%262010/plus/1"
session <- bow(url)
squad_table_raw <- scrape(session) %>%
html_nodes(".hauptlink > div > span > a") %>%
html_attr("href")
squad_table_clean <- squad_table_raw %>%
enframe() %>%
select(-name) %>%
distinct() %>%
separate(value, into = c("1", "2", '3', '4', '5'), sep = "\\/") %>%
select(player_name = 2, id_num = 5)
## add links
squad_table_df <- squad_table_clean %>%
mutate(link = glue::glue("https://www.transfermarkt.com/{player_name}/leistungsdatendetails/spieler/{id_num}/saison/2010/verein/31/liga/0/wettbewerb/GB1/pos/0/trainer_id/0/plus/1")) %>%
## remove Amoo, tom ince, Dalla Valle
slice(-31, -38, -41)
glimpse(squad_table_df)
## base dates
## use someone like Skrtel who played/in squad of every single game:
base_url <- "https://www.transfermarkt.com/martin-skrtel/leistungsdatendetails/spieler/24180/saison/2010/verein/31/liga/0/wettbewerb/GB1/pos/0/trainer_id/0/plus/1"
session <- bow(base_url)
base_raw <- scrape(session) %>%
html_nodes("div.responsive-table:nth-child(3) > table:nth-child(1)") %>%
html_table(fill = TRUE) %>%
.[[1]] %>%
janitor::clean_names() %>%
slice(-n())
base_dates <- base_raw %>%
select(date, home = home_team_2, away = away_team_2,
result, goal = x, assist = x_2,
sub_in = x_7, sub_out = x_8, minutes = x_9) %>%
## make sure minutes == 0 for BASE
mutate(date = lubridate::mdy(date),
minutes = 0) %>%
## set sub_in, sub_out = 0
mutate(sub_in = 0,
sub_out = 0) %>%
## set goals/assists = 0
mutate(goal = 0,
assist = 0) %>%
## separate result
separate(result, into = c("home_goal", "away_goal"),
sep = ":", convert = TRUE) %>%
## home - away and rank
mutate(home_rank = home %>% str_extract("[0-9]+") %>% as.numeric,
away_rank = away %>% str_extract("[0-9]+") %>% as.numeric,
home = home %>% str_remove_all("\\(.*\\)"),
away = away %>% str_remove_all("\\(.*\\)"))
saveRDS(base_dates, file = here::here("data/base_LFC_10_11_dates_df.RDS"))
base_dates <- readRDS(file = here::here("data/base_LFC_10_11_dates_df.RDS"))
## get_appearances() function
get_appearances <- function(link) {
session <- bow(link)
appearances_raw <- scrape(session) %>%
html_nodes("div.responsive-table:nth-child(3) > table:nth-child(1)") %>%
html_table(fill = TRUE) %>%
.[[1]] %>%
#magrittr::extract2(1) %>%
janitor::clean_names() %>%
slice(-n())
appearances_clean <- appearances_raw %>%
select(date, home = home_team_2, away = away_team_2,
result, goal = x, assist = x_2,
sub_in = x_7, sub_out = x_8, minutes = x_9) %>%
## fix minutes
## mutate_at(), mutate_if()...
mutate(date = lubridate::mdy(date),
minutes =
if_else(str_detect(minutes, "'"),
str_replace_all(minutes, "'", ""), minutes),
minutes = if_else(str_detect(minutes, "^[0-9]+$"),
minutes, "0") %>% as.numeric()) %>%
## fix sub_in, sub_out
mutate(sub_in =
if_else(str_detect(sub_in, "'"),
str_replace_all(sub_in, "'", ""), sub_in),
sub_in = if_else(str_detect(sub_in, "^[0-9]+$"),
sub_in, "0") %>% as.numeric(),
sub_out =
if_else(str_detect(sub_out, "'"),
str_replace_all(sub_out, "'", ""), sub_out),
sub_out = if_else(str_detect(sub_out, "^[0-9]+$"),
sub_out, "0") %>% as.numeric()) %>%
## fix goals/assists
mutate(goal = if_else(str_detect(goal, "^[0-9]+$"),
goal, "0") %>% as.numeric(),
assist = if_else(str_detect(assist, "^[0-9]+$"),
assist, "0") %>% as.numeric()) %>%
## separate result
separate(result, into = c("home_goal", "away_goal"),
sep = ":", convert = TRUE) %>%
## home - away and rank
mutate(home_rank = home %>% str_extract("[0-9]+") %>% as.numeric,
away_rank = away %>% str_extract("[0-9]+") %>% as.numeric,
home = home %>% str_remove_all("\\(.*\\)"),
away = away %>% str_remove_all("\\(.*\\)"))
## deal with no match rows:
## basically using base df, anti_join on dates and
## insert info for rows where missing
add_df <- base_dates %>%
anti_join(appearances_clean, by = c("date"))
## combine missing data with existing
appearances_clean <- appearances_clean %>%
full_join(add_df) %>%
arrange(date)
}
## iterate over each player
appearances_df_raw <- map2(.x = squad_table_df$link,
.y = squad_table_df$player_name,
~ get_appearances(link = .x) %>%
mutate(name = .y))
saveRDS(appearances_df_raw,
file = glue("{here::here()}/data/appearances_df_raw_LFC_10_11.RDS"))
appearances_df_raw <- readRDS(
file = glue("{here::here()}/data/appearances_df_raw_LFC_10_11.RDS"))
appearances_df_LFC_10_11 <- appearances_df_raw %>%
reduce(bind_rows) %>%
group_by(name) %>%
mutate(match_num = row_number()) %>%
mutate(end = seq(from = 90, to = 3420, by = 90),
start = lag(end, default = 0),
dur = if_else(minutes == 90, start, end - minutes)) %>%
## for sub-outs
mutate(end = case_when(
sub_out != 0 ~ start + sub_out,
TRUE ~ end),
dur = case_when(
sub_out != 0 ~ start,
TRUE ~ dur)) %>%
ungroup() %>%
## fix Joe Cole manually, didn't consider expulsion due to red cards
## there's probably a few others but I'll do that another time...
mutate(end = case_when(
name == "joe-cole" & match_num == 1 ~ dur,
TRUE ~ end),
dur = case_when(
name == "joe-cole" & match_num == 1 ~ 0,
TRUE ~ dur
)) %>%
## fix names and label positions
mutate(name = str_replace_all(name, "-", " ") %>% str_to_title(),
position = case_when(row_number() %in% 1:190 ~ "GK",
row_number() %in% 191:760 ~ "DF",
row_number() %in% 761:1102 ~ "MF",
row_number() %in% 1103:1482 ~ "ST"),
position = as_factor(position) %>%
fct_relevel("GK", "DF", "MF", "ST"),
name = as_factor(name)) %>%
arrange(position, name)
## save
saveRDS(appearances_df_LFC_10_11,
file = glue("{here::here()}/data/appearances_df_LFC_10_11.RDS"))
appearances_df_LFC_10_11 <- readRDS(
file = glue("{here::here()}/data/appearances_df_LFC_10_11.RDS"))
## check that each player has data for 38 games...
appearances_df_LFC_10_11 %>%
group_by(name) %>%
summarize(num = n())
## plot
## dataframes for vertical + horizontal divider lines
divide_lines <- tibble(yint = seq(0.5, 39.5, by = 1))
verticolo <- tibble(verts_start = seq(0, 3420, by = 90),
verts_end = seq(0, 3420, by = 90),
y_low = 0.5,
y_high = 39.5)
appearances_df_LFC_10_11 %>%
ggplot(aes(x = dur, xend = end,
y = name,
yend = name)) +
## Woy
geom_segment(data = appearances_df_LFC_10_11 %>% filter(match_num < 21),
aes(group = match_num),
size = 3, color = "darkred") +
## King Kenny
geom_segment(data = appearances_df_LFC_10_11 %>% filter(match_num >= 21),
aes(group = match_num),
size = 3, color = "darkgreen") +
geom_segment(data = verticolo,
aes(x = verts_start, xend = verts_end,
y = y_low, yend = y_high)) +
## Dividers
geom_segment(x = 1800, xend = 1800,
y = 0.5, yend = 39.5,
color = "darkgrey", size = 1.1) +
geom_hline(data = divide_lines, aes(yintercept = yint),
size = 0.5) +
scale_x_continuous(breaks = seq(45, 3420, 90),
labels = seq(1, 38, 1),
expand = c(0, 0)) +
expand_limits(y = c(0.1, 43)) +
## Woy
annotate(geom = "segment",
x = 945, xend = 945,
y = 40, yend = 40.5,
color = "black", size = 1) +
annotate(geom = "label",
label = "Roy Hodgson (W: 7 D: 4 L: 9)",
x = 945, y = 41, family = "Roboto Condensed") +
annotate(geom = "segment",
x = 7, xend = 1790,
y = 40, yend = 40,
color = "black", size = 1) +
annotate(geom = "segment",
x = 7, xend = 7,
y = 39.7, yend = 40.3,
color = "black", size = 1) +
annotate(geom = "segment",
x = 1790, xend = 1790,
y = 39.7, yend = 40.3,
color = "black", size = 1) +
## King Kenny
annotate(geom = "segment",
x = 2655, xend = 2655,
y = 40, yend = 40.5,
color = "black", size = 1) +
annotate(geom = "label",
label = "Kenny Dalglish (W: 10 D: 3 L: 5)",
x = 2655, y = 41, family = "Roboto Condensed") +
annotate(geom = "segment",
x = 1810, xend = 3410,
y = 40, yend = 40,
color = "black", size = 1) +
annotate(geom = "segment",
x = 1810, xend = 1810,
y = 39.7, yend = 40.3,
color = "black", size = 1) +
annotate(geom = "segment",
x = 3410, xend = 3410,
y = 39.7, yend = 40.3,
color = "black", size = 1) +
labs(title = "Player Minutes | Liverpool FC | Season 2010-2011",
subtitle = "Players Ordered by Position (ST, MF, DF, GK)",
x = "Minutes Played per Game Week", y = "",
caption = glue::glue("
Data: transfermarkt.com
By: @R_by_Ryo")) +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed"),
axis.title = element_text(size = 14),
axis.text.x = element_text(color = "black", size = 11),
axis.text.y = element_text(color = "black", size = 10),
panel.grid = element_blank(),
plot.title = element_text(size = 20),
plot.subtitle = element_text(size = 16),
plot.caption = element_text(size = 12))
ggsave(filename = here::here("Premier League 2018-2019/output/player_minutes_LFC_10_11.png"),
height = 9, width = 12)
# add logo with Magick using Thomas Mock's custom function
# check out the explanation in his blog post: https://themockup.netlify.com/posts/2019-01-09-add-a-logo-to-your-plot/
add_logo <- function(plot_path, logo_path, logo_position, logo_scale = 10){
# Requires magick R Package https://github.com/ropensci/magick
# Useful error message for logo position
if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) {
stop("Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'")
}
# read in raw images
plot <- magick::image_read(plot_path)
logo_raw <- magick::image_read(logo_path)
# get dimensions of plot for scaling
plot_height <- magick::image_info(plot)$height
plot_width <- magick::image_info(plot)$width
# default scale to 1/10th width of plot
# Can change with logo_scale
logo <- magick::image_scale(logo_raw, as.character(plot_width/logo_scale))
# Get width of logo
logo_width <- magick::image_info(logo)$width
logo_height <- magick::image_info(logo)$height
# Set position of logo
# Position starts at 0,0 at top left
# Using 0.01 for 1% - aesthetic padding
if (logo_position == "top right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "top left") {
x_pos = 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "bottom right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
} else if (logo_position == "bottom left") {
x_pos = 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
}
# Compose the actual overlay
magick::image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos))
}
# add_logo and save
plot_logo <- add_logo(plot_path = here::here("Premier League 2018-2019/output/player_minutes_LFC_10_11.png"),
logo_path = "https://upload.wikimedia.org/wikipedia/en/thumb/0/0c/Liverpool_FC.svg/800px-Liverpool_FC.svg.png",
logo_position = "top right",
logo_scale = 18)
image_write(image = plot_logo, path = here::here("Premier League 2018-2019/output/player_minutes_LFC_10_11_logo.png"))
@Ryo-N7
Copy link
Author

Ryo-N7 commented Jun 25, 2019

player_minutes_LFC_10_11_logo

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment