Skip to content

Instantly share code, notes, and snippets.

@Ryo-N7
Created January 14, 2020 10:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Ryo-N7/fe35f47525e12c59e52deb5c24e88503 to your computer and use it in GitHub Desktop.
Save Ryo-N7/fe35f47525e12c59e52deb5c24e88503 to your computer and use it in GitHub Desktop.
np-xG per 90 vs. xA per 90 plot (Bundesliga 2019-2020, Hinrunde)
# pkgs
```{r, message=FALSE, warning=FALSE}
pacman::p_load(tidyverse, scales, ggforce, ggtext,
rvest, glue, extrafont, ggrepel, magick)
loadfonts()
```
## add_logo
```{r}
add_logo <- function(plot_path, logo_path, logo_position,
logo_scale = 10){
# Requires magick R Package https://github.com/ropensci/magick
# Useful error message for logo position
if (!logo_position %in% c("top right", "top left", "bottom right", "bottom left")) {
stop("Error Message: Uh oh! Logo Position not recognized\n Try: logo_positon = 'top left', 'top right', 'bottom left', or 'bottom right'")
}
# read in raw images
plot <- magick::image_read(plot_path)
logo_raw <- magick::image_read(logo_path)
# get dimensions of plot for scaling
plot_height <- magick::image_info(plot)$height
plot_width <- magick::image_info(plot)$width
# default scale to 1/10th width of plot
# Can change with logo_scale
logo <- magick::image_scale(logo_raw, as.character(plot_width/logo_scale))
# Get width of logo
logo_width <- magick::image_info(logo)$width
logo_height <- magick::image_info(logo)$height
# Set position of logo
# Position starts at 0,0 at top left
# Using 0.01 for 1% - aesthetic padding
if (logo_position == "top right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "top left") {
x_pos = 0.01 * plot_width
y_pos = 0.01 * plot_height
} else if (logo_position == "bottom right") {
x_pos = plot_width - logo_width - 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
} else if (logo_position == "bottom left") {
x_pos = 0.01 * plot_width
y_pos = plot_height - logo_height - 0.01 * plot_height
}
# Compose the actual overlay
magick::image_composite(plot, logo, offset = paste0("+", x_pos, "+", y_pos))
}
```
# Bundesliga data (FBref)
- Save "Player Standard Stats" & "Squad Standard Stats" as a `.csv` file or whatever you prefer.
- https://fbref.com/en/comps/20/stats/Bundesliga-Stats
## squad stats
```{r}
buli_squad_stats_hinrunde_raw <- read_csv(here::here("data/buli_squad_stats_hinrunde.csv"),
skip = 1, col_names = TRUE)
buli_squad_stats_hinrunde <- buli_squad_stats_hinrunde_raw %>%
rename_at(vars(12:16),
~ glue::glue("{colnames(buli_squad_stats_hinrunde_raw)[12:16]}_p90")) %>%
rename_at(vars(20:24),
~ glue::glue("{colnames(buli_squad_stats_hinrunde_raw)[20:24]}_p90"))
buli_squad_stats_hinrunde <- buli_squad_stats_hinrunde %>%
rename_at(vars(2:24), ~ glue::glue("{colnames(buli_squad_stats_hinrunde)[2:24]}_squad")) %>%
rename_at(vars(contains("_1")), ~ str_replace(., "_1", ""))
glimpse(buli_squad_stats_hinrunde)
```
```{r}
## save
saveRDS(buli_squad_stats_hinrunde, file = glue("{here::here()}/data/buli_squad_stats_hinrunde.RDS"))
buli_squad_stats_hinrunde <- readRDS(file = glue("{here::here()}/data/buli_squad_stats_hinrunde.RDS"))
```
## player stats
```{r}
buli_player_stats_hinrunde_raw <- read_csv(here::here("data/buli_player_stats_hinrunde.csv"),
skip = 1, col_names = TRUE)
buli_player_stats_hinrunde <- buli_player_stats_hinrunde_raw %>%
rename_at(vars(17:21),
~ glue::glue("{colnames(buli_player_stats_hinrunde_raw)[17:21]}_p90")) %>%
rename_at(vars(25:29),
~ glue::glue("{colnames(buli_player_stats_hinrunde_raw)[25:29]}_p90"))
buli_player_stats_hinrunde <- buli_player_stats_hinrunde %>%
rename_at(vars(contains("_1")), ~ str_replace(., "_1", "")) %>%
select(-Matches, -Rk)
glimpse(buli_player_stats_hinrunde)
```
## clean
```{r}
goal_contribution_clean_df <- buli_player_stats_hinrunde %>%
left_join(buli_squad_stats_hinrunde, by = "Squad") %>%
separate(Player, into = c("fullname", "allname"), sep = "\\\\") %>%
separate(fullname, into = c("firstname", "lastname"),
sep = "\\s", extra = "merge",
remove = FALSE) %>%
mutate(fname = str_extract(firstname, "[A-Z]{1}")) %>%
## players like Fabinho listed without Tavares last name
mutate(player = if_else(is.na(lastname), firstname, lastname)) %>%
mutate(player = case_when(
!is.na(lastname) ~ glue("{fname}. {lastname}"),
TRUE ~ firstname)) %>%
group_by(fullname) %>%
mutate(goal_contrib = Gls / Gls_squad,
assist_contrib = Ast / Gls_squad) %>%
ungroup() %>%
## keep fullname so can differentiate A. Gomes (Andre/Angel), etc.
select(player, fullname, Pos, Squad, Min,
Gls, Gls_squad, PK, PK_squad,
Ast, Ast_squad,
goal_contrib, assist_contrib,
npxG_p90, xA_p90, npxG, xA)
glimpse(goal_contribution_clean_df)
```
## plot
```{r}
xG_xA_per90 <- goal_contribution_clean_df
```
```{r}
xG_xA_per90 %>%
filter(Min >= 900,
Pos %in% c("FW", "FWMF")) %>%
summarize(avg_npxg90 = median(npxG_p90),
avg_xA = median(xA_p90))
```
- Lewa: 14 > 12 np 8.3 np-xG
- Muller: 0.32 xG per 90 (Kovac), 0.4 xG per 90 (Flick)
-- 512 mins Flick 0.53 xA per 90 (Kovac), 0.58 xA per 90 (Flick)
- Gnabry:
--
- Plea: 0.48 npxG per 90, 0.28 xA per 90
- Thuram: 0.39 npxG per 90, 0.27 xA per 90
- Embolo: 0.34 npxG per 90, 0.27 xA per 90
```{r}
desc_bayern <- "Despite Niko Kovac's poor tactics & his eventual sacking, Bayern maintained great attacking output mainly through Robert Lewandowski's great finishing (12 np Goals from 8.3 np xG under Kovac)! Thomas Müller, having been frozen out by Kovac, got back to form under new manager Hansi Flick (0.32 xG per 90 vs. 0.4 xG per 90 & 0.53 xA per 90 vs. 0.58 xA per 90). Serge Gnabry has been a consistent threat throughout while Phil Coutinho has been doing well after a slow start."
desc_gladbach <- "Although he has cooled off a bit from an electric 4 goals & 4 assists in the first 7 games, Alassane Plea still provides the most threat with a team-leading 0.48 np-xG per 90 & 0.28 xA per 90 (Patrick Herrmann with 0.54 xG per 90 & 0.32 xA per 90 just misses out having only played 790 minutes). Plea is supported by the new arrivals Breel Embolo & Marcus Thuram who complete this dynamic trident that excels on transition plays."
```
```{r fig.width = 12, fig.height = 10}
xG_xA_per90_plot <- xG_xA_per90 %>%
filter(Min >= 900) %>%
ggplot(aes(xA_p90, npxG_p90)) +
geom_point(data = xG_xA_per90 %>%
filter(xA_p90 < 0.15 | npxG_p90 < 0.3,
Min >= 900),
color = "grey20", size = 4, alpha = 0.2) +
geom_point(data = xG_xA_per90 %>%
filter(xA_p90 > 0.15 | npxG_p90 > 0.3,
Min >= 900),
color = "red", size = 4) +
geom_hline(yintercept = 0.3, alpha = 0.6) +
geom_vline(xintercept = 0.15, alpha = 0.6) +
## All player labels
geom_text_repel(
data = xG_xA_per90 %>%
filter(xA_p90 > 0.15 | npxG_p90 > 0.3,
!player %in% c("R. Lewandowski",
"S. Gnabry", "T. Müller",
"P. Coutinho",
"M. Thuram",
"B. Embolo", "A. Pléa",
"K. Volland",
"F. Niederlechner",
"T. Werner"),
Min >= 900),
aes(label = player, family = "Roboto Condensed",
fontface = "bold"),
seed = 15, size = 3.5,
min.segment.length = 0, segment.color = "red",
point.padding = 0.5) +
## Separate player
geom_text(data = xG_xA_per90 %>%
filter(player %in% c("T. Werner",
"F. Niederlechner",
"K. Volland")),
aes(label = player, family = "Roboto Condensed",
fontface = "bold"),
size = 3.5, nudge_x = 0.01, hjust = 0) +
## Bayern player labels
geom_text_repel(
data = xG_xA_per90 %>%
filter(player %in% c("R. Lewandowski", "P. Coutinho",
"S. Gnabry", "T. Müller")),
aes(label = player, family = "Roboto Condensed",
fontface = "bold"),
seed = 15, size = 4.5, color = "red",
min.segment.length = 0, segment.color = "red",
point.padding = 0.5, nudge_y = 0.05) +
## Gladbach player labels
geom_text_repel(
data = xG_xA_per90 %>%
filter(player %in% c("M. Thuram",
"B. Embolo", "A. Pléa")),
aes(label = player, family = "Roboto Condensed",
fontface = "bold"),
seed = 15, size = 4.5, color = "#228B22",
min.segment.length = 0, segment.color = NA,
point.padding = 0.5) +
## Bayern description
geom_mark_hull(
aes(filter = player %in% c("R. Lewandowski",
"S. Gnabry",
"T. Müller",
"P. Coutinho"),
label = "Bayern's Attack Remains Strong.",
description = desc_bayern),
expand = unit(2.5, "mm"), con.cap = unit(0, "mm"),
label.width = unit(130, 'mm'), label.buffer = unit(5, "mm"),
label.family = "Roboto Condensed", label.fontsize = c(14, 12),
label.colour = c("red", "black")) +
## Gladbach description
geom_mark_hull(
aes(filter = player %in% c("M. Thuram",
"B. Embolo"),
label = "Gladbach's Attacking Trident",
description = desc_gladbach),
expand = unit(0.05, "mm"), con.cap = unit(0, "mm"),
label.width = unit(65, 'mm'), label.buffer = unit(49, "mm"),
label.family = "Roboto Condensed", label.fontsize = c(14, 11),
label.colour = c("#228B22", "black")) +
geom_mark_circle(
aes(filter = player == "A. Pléa"),
expand = unit(3, "mm"), label.width = unit(50, 'mm'),
label.buffer = unit(30, "mm"), label.fontsize = c(14, 11),
label.family = "Roboto Condensed",
label.colour = c("#228B22", "black")) +
## Gladbach desc.
annotate("segment",
x = 0.288, xend = 0.37,
y = 0.48, yend = 0.48) +
annotate("segment",
x = 0.37, xend = 0.37,
y = 0.48, yend = 0.348) +
## xG and xA league average
annotate("text", family = "Roboto Condensed", fontface = "bold",
x = 0.05, y = 0.94, hjust = 0, #color = "red",
label = "Average xA per 90: 0.15") +
annotate("text", family = "Roboto Condensed", fontface = "bold",
x = 0.62, y = 0.29, #color = "red",
label = "Average np-xG per 90: 0.3") +
scale_x_continuous(labels = seq(0, 0.9, 0.1),
breaks = seq(0, 0.9, 0.1),
limits = c(0, 0.65)) +
scale_y_continuous(labels = seq(0, 0.9, 0.1),
breaks = seq(0, 0.9, 0.1),
limits = c(0, 0.95)) +
labs(title = "<b style='color: #228B22'>Gladbach's Trident</b> & <b style='color: red'>Bayern's Attackers</b> Lead the League in xG per 90 & xA per 90",
subtitle = glue("
Bundesliga (2019-2020) | Hinrunde | January 14th, 2020
<p><b style='color: black'>Average (Median)</b> for Midfielders/Forwards | Minimum 900 Minutes Played"),
caption = glue("
Data: FBref | StatsBomb
Ryo Nakagawara, Twitter: @R_by_Ryo"),
x = "Expected Assists (xA) per 90",
y = "non-Penalty Expected Goals (np-xG) per 90") +
theme_minimal() +
theme(text = element_text(family = "Roboto Condensed"),
plot.title = element_markdown(size = 18),
plot.subtitle = element_markdown(size = 16),
plot.caption = element_text(size = 14),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12),
panel.grid.minor.x = element_blank())
xG_xA_per90_plot
```
#### save
```{r}
ggsave(plot = xG_xA_per90_plot,
here::here("Bundesliga 2019-2020/output/xA_xG_per90_1920_hinrunde.png"),
height = 10, width = 12)
```
```{r}
plot_logo <- add_logo(
plot_path = here::here("Bundesliga 2019-2020/output/xA_xG_per90_1920_hinrunde.png"),
logo_path = here::here("../soccer_match_reports/img/Bundesliga_logo_(2017).svg"),
logo_position = "top right",
logo_scale = 14)
plot_logo
```
```{r}
image_write(image = plot_logo,
here::here("Bundesliga 2019-2020/output/xA_xG_per90_1920_hinrunde_logo.png"))
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment