Skip to content

Instantly share code, notes, and snippets.

@tonyelhabr
Last active April 9, 2021 12:08
Show Gist options
  • Save tonyelhabr/cf0532ac9631388f9f39cf6be4ee8d33 to your computer and use it in GitHub Desktop.
Save tonyelhabr/cf0532ac9631388f9f39cf6be4ee8d33 to your computer and use it in GitHub Desktop.
Bin soccer event data.
library(tidyverse)
# general helper function
retrieve_sb_events <-
function(competition_id,
...,
clean = TRUE,
export = TRUE,
dir = 'data',
ext = 'rds',
file = glue::glue('events_{competition_id}{ifelse(clean, "_clean", "")}.{ext}'),
path = file.path(dir, file),
overwrite = FALSE,
f_import = rio::import,
f_export = rio::export) {
path_exists <- fs::file_exists(path)
if(path_exists & !overwrite) {
return(f_import(path, ...))
}
comps <- StatsBombR::FreeCompetitions()
matches <-
comps %>%
filter(competition_id == !!competition_id) %>%
StatsBombR::FreeMatches() %>%
arrange(match_date)
events <- matches %>% StatsBombR::StatsBombFreeEvents()
if(clean) {
events <- events %>% StatsBombR::allclean()
}
f_export(events, path)
events
}
events <-
retrieve_sb_events(competition_id = 43, overwrite = FALSE) %>%
select(player_id = player.id, x = location.x, y = location.y)
events %>% mutate(across(player_id, factor)) %>% skimr::skim()
# This is just for illustration. Probably should import your custom rectangles from a CSV
blocks <-
tibble(
x_min = rep(c(0, 30, 60, 90), 2),
x_max = rep(c(30, 60, 90, 120), 2),
y_min = c(rep(0, 4), rep(40, 4)),
y_max = c(rep(40, 4), rep(80, 4))
) %>%
mutate(idx = row_number())
blocks
# quick check
blocks %>%
ggplot() +
ggsoccer::annotate_pitch(
dimension = ggsoccer::pitch_statsbomb,
fill = 'white',
colour = 'black',
limits = FALSE
) +
geom_rect(
aes(xmin = x_min, ymin = y_min, xmax = x_max, ymax = y_max),
fill = NA,
color = 'black',
alpha = 0.1
) +
geom_text(
aes(x = (x_min + x_max) / 2, y = (y_min + y_max) / 2, label = idx)
) +
coord_fixed(ratio = 2 / 3) +
theme_void()
# There are other ways to non-equi joins, but I've found this works great even with a more granular grid of cells.
events_dt <- events %>% drop_na() %>% data.table::as.data.table()
blocks_dt <- blocks %>% data.table::as.data.table()
events_binned <-
events_dt[blocks_dt, on=.(x > x_min, x <= x_max, y >= y_min, y < y_max)] %>%
as_tibble() %>%
select(player_id, idx, x, y) %>%
drop_na()
events_binned
# You could filter for your specific player before this to avoid some extra calculations, but we filter later here.
grid_players <-
blocks %>%
mutate(dummy = 0L) %>%
# Cartesian join of all possible cells in the grid and all players in `events`.
full_join(
events %>%
drop_na() %>%
distinct(player_id) %>%
mutate(dummy = 0L),
by = 'dummy'
)
grid_players %>% skimr::skim()
players <-
events_binned %>%
group_by(player_id, idx) %>%
summarize(n = n()) %>%
ungroup() %>%
full_join(grid_players) %>%
select(-dummy) %>%
replace_na(list(n = 0L)) %>%
arrange(player_id)
players
player_id_messi <- 5503L
messi <-
players %>%
filter(player_id == player_id_messi)
messi
p <-
messi %>%
ggplot() +
ggsoccer::annotate_pitch(
dimension = ggsoccer::pitch_statsbomb,
fill = 'white',
colour = 'black',
limits = FALSE
) +
coord_fixed(ratio = 2 / 3, clip = 'off') +
theme_void() +
geom_rect(
show.legend = FALSE,
aes(xmin = x_min, ymin = y_min, xmax = x_max, ymax = y_max, fill = n),
alpha = 0.5
) +
geom_text(
size = 6,
fontface = 'bold',
aes(x = 0.5 * (x_min + x_max), y = 0.5 * (y_min + y_max), label = n),
hjust = 0.5,
vjust = 0.5
) +
scale_fill_distiller(palette = 'Reds', direction = 1) +
theme(plot.title.position = 'plot', plot.title = element_text(hjust = 0.5, face = 'bold')) +
labs(title = 'Messi\'s Touches')
p
ggsave(
plot = p,
filename = file.path('data', 'example.png'),
width = 6,
height = 6 * 2 / 3,
type = 'cairo'
)
@tonyelhabr
Copy link
Author

example

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment