Last active
April 9, 2021 12:08
-
-
Save tonyelhabr/cf0532ac9631388f9f39cf6be4ee8d33 to your computer and use it in GitHub Desktop.
Bin soccer event data.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
# general helper function | |
retrieve_sb_events <- | |
function(competition_id, | |
..., | |
clean = TRUE, | |
export = TRUE, | |
dir = 'data', | |
ext = 'rds', | |
file = glue::glue('events_{competition_id}{ifelse(clean, "_clean", "")}.{ext}'), | |
path = file.path(dir, file), | |
overwrite = FALSE, | |
f_import = rio::import, | |
f_export = rio::export) { | |
path_exists <- fs::file_exists(path) | |
if(path_exists & !overwrite) { | |
return(f_import(path, ...)) | |
} | |
comps <- StatsBombR::FreeCompetitions() | |
matches <- | |
comps %>% | |
filter(competition_id == !!competition_id) %>% | |
StatsBombR::FreeMatches() %>% | |
arrange(match_date) | |
events <- matches %>% StatsBombR::StatsBombFreeEvents() | |
if(clean) { | |
events <- events %>% StatsBombR::allclean() | |
} | |
f_export(events, path) | |
events | |
} | |
events <- | |
retrieve_sb_events(competition_id = 43, overwrite = FALSE) %>% | |
select(player_id = player.id, x = location.x, y = location.y) | |
events %>% mutate(across(player_id, factor)) %>% skimr::skim() | |
# This is just for illustration. Probably should import your custom rectangles from a CSV | |
blocks <- | |
tibble( | |
x_min = rep(c(0, 30, 60, 90), 2), | |
x_max = rep(c(30, 60, 90, 120), 2), | |
y_min = c(rep(0, 4), rep(40, 4)), | |
y_max = c(rep(40, 4), rep(80, 4)) | |
) %>% | |
mutate(idx = row_number()) | |
blocks | |
# quick check | |
blocks %>% | |
ggplot() + | |
ggsoccer::annotate_pitch( | |
dimension = ggsoccer::pitch_statsbomb, | |
fill = 'white', | |
colour = 'black', | |
limits = FALSE | |
) + | |
geom_rect( | |
aes(xmin = x_min, ymin = y_min, xmax = x_max, ymax = y_max), | |
fill = NA, | |
color = 'black', | |
alpha = 0.1 | |
) + | |
geom_text( | |
aes(x = (x_min + x_max) / 2, y = (y_min + y_max) / 2, label = idx) | |
) + | |
coord_fixed(ratio = 2 / 3) + | |
theme_void() | |
# There are other ways to non-equi joins, but I've found this works great even with a more granular grid of cells. | |
events_dt <- events %>% drop_na() %>% data.table::as.data.table() | |
blocks_dt <- blocks %>% data.table::as.data.table() | |
events_binned <- | |
events_dt[blocks_dt, on=.(x > x_min, x <= x_max, y >= y_min, y < y_max)] %>% | |
as_tibble() %>% | |
select(player_id, idx, x, y) %>% | |
drop_na() | |
events_binned | |
# You could filter for your specific player before this to avoid some extra calculations, but we filter later here. | |
grid_players <- | |
blocks %>% | |
mutate(dummy = 0L) %>% | |
# Cartesian join of all possible cells in the grid and all players in `events`. | |
full_join( | |
events %>% | |
drop_na() %>% | |
distinct(player_id) %>% | |
mutate(dummy = 0L), | |
by = 'dummy' | |
) | |
grid_players %>% skimr::skim() | |
players <- | |
events_binned %>% | |
group_by(player_id, idx) %>% | |
summarize(n = n()) %>% | |
ungroup() %>% | |
full_join(grid_players) %>% | |
select(-dummy) %>% | |
replace_na(list(n = 0L)) %>% | |
arrange(player_id) | |
players | |
player_id_messi <- 5503L | |
messi <- | |
players %>% | |
filter(player_id == player_id_messi) | |
messi | |
p <- | |
messi %>% | |
ggplot() + | |
ggsoccer::annotate_pitch( | |
dimension = ggsoccer::pitch_statsbomb, | |
fill = 'white', | |
colour = 'black', | |
limits = FALSE | |
) + | |
coord_fixed(ratio = 2 / 3, clip = 'off') + | |
theme_void() + | |
geom_rect( | |
show.legend = FALSE, | |
aes(xmin = x_min, ymin = y_min, xmax = x_max, ymax = y_max, fill = n), | |
alpha = 0.5 | |
) + | |
geom_text( | |
size = 6, | |
fontface = 'bold', | |
aes(x = 0.5 * (x_min + x_max), y = 0.5 * (y_min + y_max), label = n), | |
hjust = 0.5, | |
vjust = 0.5 | |
) + | |
scale_fill_distiller(palette = 'Reds', direction = 1) + | |
theme(plot.title.position = 'plot', plot.title = element_text(hjust = 0.5, face = 'bold')) + | |
labs(title = 'Messi\'s Touches') | |
p | |
ggsave( | |
plot = p, | |
filename = file.path('data', 'example.png'), | |
width = 6, | |
height = 6 * 2 / 3, | |
type = 'cairo' | |
) |
Author
tonyelhabr
commented
Apr 9, 2021
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment