Last active
December 1, 2019 09:56
-
-
Save Ryo-N7/622fabee1cde074c1a1d8f2698bf56df to your computer and use it in GitHub Desktop.
Visualize the EPL, Part 2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#### | |
# Will keep updating this so if everything isn't there yet please be patient! | |
#### | |
## Packages | |
pacman::p_load(tidyverse, polite, scales, ggimage, ggforce, | |
understatr, cowplot, kableExtra, ggbeeswarm, | |
jsonlite, xml2, qdapRegex, stringi, stringr, | |
rvest, glue, extrafont, ggrepel, magick, ggtext) | |
loadfonts(quiet = TRUE) | |
## Data | |
## Load in data from: soccer_ggplots/data/EPL_shots_data_df_raw.RDS | |
## Read in directly from the URL: | |
EPL_shots_data_df_raw <- readRDS(url("https://raw.githubusercontent.com/Ryo-N7/soccer_ggplots/master/data/EPL_shots_data_df_raw.RDS")) | |
## Colors | |
fillvals <- c("Liverpool" = "red", | |
"Tottenham" = "#132257", | |
"Manchester City" = "#6CABDD", | |
"Chelsea" = "blue", #"#034694", | |
"Arsenal" = "green", #"#9C824A", | |
"Manchester United" = "orange", | |
"Leicester" = "#b15928", | |
"Sheffield United" = "pink", | |
"Bournemouth" = "purple", | |
"Brighton" = "white", | |
"Other" = "grey") | |
## CLEANED data | |
EPL_situation_df <- EPL_shots_data_df_raw %>% | |
## pick out 3rd data frame which is game states | |
map(1) %>% | |
bind_rows(.id = "team_name") | |
EPL_situation_df_clean <- EPL_situation_df %>% | |
mutate(name = str_replace(name, "against.", "against_"), | |
team_name = str_replace(team_name, "_", " "), | |
team_name = as_factor(team_name)) %>% | |
separate(name, c("situation", "metric"), | |
sep = "\\.", extra = "merge") %>% | |
filter(metric != "stat") %>% | |
pivot_wider(names_from = "metric", values_from = "value") %>% | |
## create some more metrics | |
mutate(xGperShot = xG / shots, | |
xGAperShot = against_xG / against_shots) %>% | |
pivot_longer(-c("team_name", "situation"), | |
names_to = "metric", values_to = "value") %>% | |
pivot_wider(names_from = "situation", values_from = "value") | |
EPL_situation_percentile_df <- EPL_situation_df %>% | |
mutate(name = str_replace(name, "against.", "against_"), | |
team_name = str_replace(team_name, "_", " ")) %>% | |
separate(name, c("situation", "metric"), | |
sep = "\\.", extra = "merge") %>% | |
filter(metric != "stat") %>% | |
pivot_wider(names_from = "metric", values_from = "value") %>% | |
## create some more metrics | |
mutate(xGperShot = xG / shots, | |
xGAperShot = against_xG / against_shots) %>% | |
## normalize | |
group_by(situation) %>% | |
mutate_at(vars(c("against_shots", "against_goals", | |
"against_xG", "xGAperShot")), | |
~ rescale(., to = c(100, 0))) %>% | |
mutate_at(vars(c("shots", "goals", "xG", "xGperShot")), | |
~ rescale(., to = c(0, 100))) %>% | |
ungroup() %>% | |
## pivot back | |
pivot_longer(-c("team_name", "situation"), | |
names_to = "metric", values_to = "value") %>% | |
## percentile rank | |
group_by(metric, situation) %>% | |
mutate(percrank = rank(value) / length(value)) %>% | |
ungroup() %>% | |
select(-value) %>% | |
## pivot back | |
pivot_wider(names_from = "situation", values_from = "percrank") %>% | |
mutate(metric = case_when( | |
metric == "against_shots" ~ "Shots Against", | |
metric == "shots" ~ "Shots", | |
metric == "against_xG" ~ "xGA", | |
metric == "against_goals" ~ "Goals Against", | |
metric == "goals" ~ "Goals", | |
metric == "xG" ~ "xG", | |
metric == "xGperShot" ~ "xG per Shot", | |
metric == "xGAperShot" ~ "xGA per Shot", | |
TRUE ~ metric | |
)) %>% | |
## resort metric order | |
mutate(metric = as_factor(metric)) %>% | |
mutate(metric = fct_relevel(metric, "Shots Against", "Shots", | |
"Goals Against", "xGA", | |
"Goals", "xG")) | |
## Open Play: Attack | |
EPL_situation_percentile_df %>% | |
## team colors | |
mutate(team_name = case_when( | |
!team_name %in% c("Liverpool", "Tottenham", "Manchester City", | |
"Chelsea", "Arsenal", "Manchester United", | |
"Leicester", "Brighton") ~ "Other", | |
TRUE ~ team_name | |
)) %>% | |
mutate(team_name = as_factor(team_name)) %>% | |
filter(metric %in% c("Shots", "Goals", "xG", "xG per Shot")) %>% | |
ggplot(aes(x = metric)) + | |
geom_point(aes(y = OpenPlay, fill = team_name), | |
size = 6, stroke = 1.5, | |
color = "black", shape = 21, | |
position = position_jitterdodge(jitter.width = 0.6, | |
dodge.width = 0.8, | |
seed = 1892)) + | |
geom_vline(xintercept = 0.5) + | |
geom_vline(xintercept = 1.5) + | |
geom_vline(xintercept = 2.5) + | |
geom_vline(xintercept = 3.5) + | |
geom_vline(xintercept = 4.5) + | |
scale_fill_manual(values = fillvals, name = "Team") + | |
scale_y_continuous(labels = scales::percent_format(accuracy = 1), | |
breaks = c(0, 0.25, 0.5, 0.75, 1), | |
limits = c(0, 1)) + | |
labs(title = "Offensive Stats from Open Play", | |
subtitle = glue(" | |
Percentile Rank | |
As of Nov. 10, 2019 (Matchday 12)"), | |
caption = glue(" | |
Data: understat.com | |
Twitter: @R_by_Ryo"), | |
x = NULL, | |
y = "Percentile Rank") + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed"), | |
plot.title = element_text(size = 20), | |
plot.subtitle = element_text(size = 18), | |
plot.caption = element_text(size = 12), | |
axis.title = element_text(size = 16), | |
axis.text = element_text(size = 14, color = "black"), | |
legend.title = element_text(size = 16), | |
legend.text = element_text(size = 14)) | |
## Open Play: Defense | |
EPL_situation_percentile_df %>% | |
## team colors | |
mutate(team_name = case_when( | |
!team_name %in% c("Liverpool", "Tottenham", "Manchester City", | |
"Chelsea", "Arsenal", "Manchester United", | |
"Leicester", "Brighton") ~ "Other", | |
TRUE ~ team_name | |
)) %>% | |
mutate(team_name = as_factor(team_name)) %>% | |
filter(metric %in% c("Shots Against", "Goals Against", | |
"xGA", "xGA per Shot")) %>% | |
ggplot(aes(x = metric)) + | |
geom_point(aes(y = OpenPlay, fill = team_name), | |
size = 6, stroke = 1.5, | |
color = "black", shape = 21, | |
position = position_jitterdodge(jitter.width = 0.6, | |
dodge.width = 0.8, | |
seed = 1892)) + | |
geom_vline(xintercept = 0.5) + | |
geom_vline(xintercept = 1.5) + | |
geom_vline(xintercept = 2.5) + | |
geom_vline(xintercept = 3.5) + | |
geom_vline(xintercept = 4.5) + | |
scale_fill_manual(values = fillvals, name = "Team") + | |
scale_y_continuous(labels = scales::percent_format(accuracy = 1), | |
breaks = c(0, 0.25, 0.5, 0.75, 1), | |
limits = c(0, 1)) + | |
labs(title = "Defensive Stats from Open Play", | |
subtitle = glue(" | |
Percentile Rank | |
As of Nov. 10, 2019 (Matchday 12)"), | |
caption = glue(" | |
Data: understat.com | |
Twitter: @R_by_Ryo"), | |
x = NULL, | |
y = "Percentile Rank") + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed"), | |
plot.title = element_text(size = 20), | |
plot.subtitle = element_text(size = 18), | |
plot.caption = element_text(size = 12), | |
axis.title = element_text(size = 16), | |
axis.text = element_text(size = 14, color = "black"), | |
legend.title = element_text(size = 16), | |
legend.text = element_text(size = 14)) | |
## Setpiece ALL (Indirect free kick + corners) | |
EPL_situation_SetPieceAll_df <- EPL_situation_df_clean %>% | |
group_by(team_name, metric) %>% | |
mutate(SetPieceAll = FromCorner + SetPiece) %>% | |
ungroup() %>% | |
select(team_name, metric, SetPieceAll) %>% | |
pivot_wider(names_from = "metric", values_from = "SetPieceAll") %>% | |
## recalculate per shot values | |
group_by(team_name) %>% | |
mutate(xGperShot = xG / shots, | |
xGAperShot = against_xG / against_shots) %>% | |
ungroup() %>% | |
## back to other | |
mutate_at(vars(c("against_shots", "against_goals", | |
"against_xG", "xGAperShot")), | |
~ rescale(., to = c(100, 0))) %>% | |
mutate_at(vars(c("shots", "goals", "xG", "xGperShot")), | |
~ rescale(., to = c(0, 100))) %>% | |
## pivot back | |
pivot_longer(-c("team_name"), | |
names_to = "metric", values_to = "value") %>% | |
## percentile rank | |
group_by(metric) %>% | |
mutate(percrank = rank(value) / length(value)) %>% | |
ungroup() %>% | |
select(-value) %>% | |
## resort metric order | |
mutate(metric = case_when( | |
metric == "against_shots" ~ "Shots Against", | |
metric == "shots" ~ "Shots", | |
metric == "against_xG" ~ "xGA", | |
metric == "against_goals" ~ "Goals Against", | |
metric == "goals" ~ "Goals", | |
metric == "xG" ~ "xG", | |
metric == "xGperShot" ~ "xG per Shot", | |
metric == "xGAperShot" ~ "xGA per Shot", | |
TRUE ~ metric | |
)) %>% | |
## resort metric order | |
mutate(metric = as_factor(metric)) %>% | |
mutate(metric = fct_relevel(metric, "Shots Against", "Shots", | |
"Goals Against", "xGA", | |
"Goals", "xG")) | |
## Set piece: Attack | |
EPL_situation_SetPieceAll_df %>% | |
## team colors | |
mutate(team_name = as.character(team_name)) %>% | |
mutate(team_name = case_when( | |
!team_name %in% c("Liverpool", "Tottenham", "Manchester City", | |
"Chelsea", "Arsenal", "Manchester United", | |
"Leicester", "Bournemouth") ~ "Other", | |
TRUE ~ team_name | |
)) %>% | |
mutate(team_name = as_factor(team_name)) %>% | |
filter(metric %in% c("Shots", "Goals", "xG", "xG per Shot")) %>% | |
ggplot(aes(x = metric)) + | |
geom_point(aes(y = percrank, fill = team_name), | |
size = 6, stroke = 1.5, | |
color = "black", shape = 21, | |
position = position_jitterdodge(jitter.width = 0.6, | |
dodge.width = 0.8, | |
seed = 1892)) + | |
geom_vline(xintercept = 0.5) + | |
geom_vline(xintercept = 1.5) + | |
geom_vline(xintercept = 2.5) + | |
geom_vline(xintercept = 3.5) + | |
geom_vline(xintercept = 4.5) + | |
scale_fill_manual(values = fillvals, name = "Team") + | |
scale_y_continuous(labels = scales::percent_format(accuracy = 1), | |
breaks = c(0, 0.25, 0.5, 0.75, 1), | |
limits = c(0, 1)) + | |
labs(title = "Offensive Stats from Corners & Set Pieces", | |
subtitle = glue(" | |
Percentile Rank | Excluding Direct Free Kicks | |
As of Nov. 10, 2019 (Matchday 12)"), | |
caption = glue(" | |
Data: understat.com | |
Twitter: @R_by_Ryo"), | |
x = NULL, | |
y = "Percentile Rank") + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed"), | |
plot.title = element_text(size = 20), | |
plot.subtitle = element_text(size = 18), | |
plot.caption = element_text(size = 12), | |
axis.title = element_text(size = 16), | |
axis.text = element_text(size = 14, color = "black"), | |
legend.title = element_text(size = 16), | |
legend.text = element_text(size = 14)) | |
## Set piece: Defense | |
EPL_situation_SetPieceAll_df %>% | |
mutate(team_name = as.character(team_name)) %>% | |
## team colors | |
mutate(team_name = case_when( | |
!team_name %in% c("Liverpool", "Tottenham", "Manchester City", | |
"Chelsea", "Arsenal", "Manchester United", | |
"Leicester", "Bournemouth") ~ "Other", | |
TRUE ~ team_name | |
)) %>% | |
mutate(team_name = as_factor(team_name)) %>% | |
filter(metric %in% c("Shots Against", "Goals Against", | |
"xGA", "xGA per Shot")) %>% | |
ggplot(aes(x = metric)) + | |
geom_point(aes(y = percrank, fill = team_name), | |
size = 6, stroke = 1.5, | |
color = "black", shape = 21, | |
position = position_jitterdodge(jitter.width = 0.6, | |
dodge.width = 0.8, | |
seed = 1892)) + | |
geom_vline(xintercept = 0.5) + | |
geom_vline(xintercept = 1.5) + | |
geom_vline(xintercept = 2.5) + | |
geom_vline(xintercept = 3.5) + | |
geom_vline(xintercept = 4.5) + | |
scale_fill_manual(values = fillvals, name = "Team") + | |
scale_y_continuous(labels = scales::percent_format(accuracy = 1), | |
breaks = c(0, 0.25, 0.5, 0.75, 1), | |
limits = c(0, 1)) + | |
labs(title = "Defensive Stats from Corners & Set Pieces", | |
subtitle = glue(" | |
Percentile Rank | Excluding Direct Free Kicks | |
As of Nov. 10, 2019 (Matchday 12)"), | |
caption = glue(" | |
Data: understat.com | |
Twitter: @R_by_Ryo"), | |
x = NULL, | |
y = "Percentile Rank") + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed"), | |
plot.title = element_text(size = 20), | |
plot.subtitle = element_text(size = 18), | |
plot.caption = element_text(size = 12), | |
axis.title = element_text(size = 16), | |
axis.text = element_text(size = 14, color = "black"), | |
legend.title = element_text(size = 16), | |
legend.text = element_text(size = 14)) | |
## Corners | |
## Corners: Attack | |
EPL_situation_percentile_df %>% | |
## team colors | |
mutate(team_name = case_when( | |
!team_name %in% c("Liverpool", "Tottenham", "Manchester City", | |
"Chelsea", "Arsenal", "Manchester United", | |
"Leicester") ~ "Other", | |
TRUE ~ team_name | |
)) %>% | |
mutate(team_name = as_factor(team_name)) %>% | |
filter(metric %in% c("Shots", "Goals", "xG", "xG per Shot")) %>% | |
ggplot(aes(x = metric)) + | |
geom_point(aes(y = FromCorner, fill = team_name), | |
size = 6, stroke = 1.5, | |
color = "black", shape = 21, | |
position = position_jitterdodge(jitter.width = 0.6, | |
dodge.width = 0.8, | |
seed = 1892)) + | |
geom_vline(xintercept = 0.5) + | |
geom_vline(xintercept = 1.5) + | |
geom_vline(xintercept = 2.5) + | |
geom_vline(xintercept = 3.5) + | |
geom_vline(xintercept = 4.5) + | |
scale_fill_manual(values = fillvals, name = "Team") + | |
scale_y_continuous(labels = scales::percent_format(accuracy = 1), | |
breaks = c(0, 0.25, 0.5, 0.75, 1), | |
limits = c(0, 1)) + | |
labs(title = "Offensive Stats from Corner Kicks", | |
subtitle = glue(" | |
Percentile Rank | |
As of Nov. 10, 2019 (Matchday 12)"), | |
caption = glue(" | |
Data: understat.com | |
Twitter: @R_by_Ryo"), | |
x = NULL, | |
y = "Percentile Rank") + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed"), | |
plot.title = element_text(size = 20), | |
plot.subtitle = element_text(size = 18), | |
plot.caption = element_text(size = 12), | |
axis.title = element_text(size = 16), | |
axis.text = element_text(size = 14, color = "black"), | |
legend.title = element_text(size = 16), | |
legend.text = element_text(size = 14)) | |
## Corners: Defense | |
EPL_situation_percentile_df %>% | |
## team colors | |
# mutate(team_name = case_when( | |
# !team_name %in% c("Liverpool", "Tottenham", "Manchester City", | |
# "Chelsea", "Arsenal", "Manchester United", | |
# "Leicester", "Brighton") ~ "Other", | |
# TRUE ~ team_name | |
# )) %>% | |
mutate(team_name = as_factor(team_name)) %>% | |
filter(metric %in% c("shots", "goals", "xG", "xGperShot")) %>% | |
ggplot(aes(x = metric)) + | |
geom_point(aes(y = FromCorner, fill = team_name), | |
size = 6, stroke = 1.5, | |
color = "black", shape = 21, | |
position = position_jitterdodge(jitter.width = 0.6, | |
dodge.width = 0.8, | |
seed = 1892)) + | |
geom_vline(xintercept = 0.5) + | |
geom_vline(xintercept = 1.5) + | |
geom_vline(xintercept = 2.5) + | |
geom_vline(xintercept = 3.5) + | |
geom_vline(xintercept = 4.5) + | |
# scale_fill_manual(values = fillvals, name = "Team") + | |
scale_fill_manual(values = pals::glasbey(20), name = "Team") + | |
scale_y_continuous(labels = scales::percent_format(accuracy = 1), | |
breaks = c(0, 0.25, 0.5, 0.75, 1), | |
limits = c(0, 1)) + | |
labs(title = "Offensive Stats from Corner Kicks", | |
subtitle = glue(" | |
Percentile Rank | |
As of Nov. 10, 2019 (Matchday 12)"), | |
caption = glue(" | |
Data: understat.com | |
Twitter: @R_by_Ryo"), | |
x = NULL, | |
y = "Percentile Rank") + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed"), | |
plot.title = element_text(size = 20), | |
plot.subtitle = element_text(size = 18), | |
plot.caption = element_text(size = 12), | |
axis.title = element_text(size = 16), | |
axis.text = element_text(size = 14, color = "black"), | |
legend.title = element_text(size = 16), | |
legend.text = element_text(size = 14)) | |
## % of xG from situations | |
EPL_situation_OpenPlay_And_All_df <- EPL_situation_df_clean %>% | |
mutate(Penalty = if_else(is.na(Penalty), 0, Penalty)) %>% | |
group_by(team_name, metric) %>% | |
mutate(all = sum(OpenPlay + DirectFreekick + FromCorner, SetPiece), | |
openplay_perc = OpenPlay / all, | |
corner_perc = FromCorner / all, | |
half_corner = corner_perc / 2, | |
half = openplay_perc / 2) %>% | |
ungroup() %>% | |
select(team_name, metric, openplay_perc, half, | |
corner_perc, half_corner) | |
## % of xG from corner kicks | |
EPL_situation_OpenPlay_And_All_df %>% | |
filter(metric == "xG") %>% | |
ggplot(aes(x = fct_reorder(team_name, corner_perc), | |
y = corner_perc)) + | |
geom_col(color = "black", fill = "#00ff85") + | |
geom_text(aes(y = half_corner, | |
label = percent_format(accuracy = 0.1) (corner_perc)), | |
color = "#38003c", size = 5.5, fontface = "bold", | |
family = "Roboto Condensed") + | |
scale_y_continuous(expand = c(0.01, 0), | |
labels = scales::percent_format(accuracy = 2)) + | |
theme_minimal() + | |
theme(text = element_text(family = "Roboto Condensed", | |
color = "white"), | |
title = element_text(size = 18), | |
axis.text = element_text(color = "white", size = 14), | |
plot.background = element_rect(fill = "#38003c")) + | |
labs(title = "Percentage of Total non-Penalty xG from Corner Kicks", | |
subtitle = "As of Nov. 10, 2019 (Matchday 12)", | |
x = NULL, y = "Percentage of Total non-Penalty xG from Corner Kicks", | |
caption = glue("Twitter: @R_by_Ryo Data: understat.com")) + | |
coord_flip() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment