Skip to content

Instantly share code, notes, and snippets.

@JoGall
Last active March 13, 2019 09:57
Show Gist options
  • Save JoGall/b4d15a047152ddc2eaf5bf6b9408e859 to your computer and use it in GitHub Desktop.
Save JoGall/b4d15a047152ddc2eaf5bf6b9408e859 to your computer and use it in GitHub Desktop.
Possession adjust metrics for StatsBomb data
# Compute total minutes played per player -------------------------------
statsbombMinsPlayed <- function(dat) {
dat <- dat %>%
group_by(match_id) %>%
mutate(end = max(ElapsedTime))
# starting XI
minutes <- dat %>%
group_by(match_id, player.name) %>%
filter(row_number() == 1) %>%
select(team.name, player.name, ElapsedTime, end) %>%
na.omit() %>%
group_by(match_id) %>%
top_n(22, -ElapsedTime) %>%
mutate(start = 0) %>%
select(-ElapsedTime)
# substitutes
minutes <- rbind(
minutes,
dat %>%
filter(type.name == "Substitution") %>%
select(match_id, team.name, player.name = substitution.replacement.name, end, start = ElapsedTime)
)
minutes <- left_join(
minutes,
dat %>%
filter(type.name == "Substitution") %>%
select(match_id, team.name, player.name, end = ElapsedTime),
by = c("match_id", "team.name", "player.name")
)
minutes <- minutes %>%
mutate(end = if_else(end.y < end.x, end.y, end.x),
end = if_else(is.na(end.y), end.x, end.y)) %>%
mutate(mins = (end - start) / 60) %>%
select(-end.x, -end.y)
return(minutes)
}
# Get opposition time in possession for each player -------------------------------
d <- readRDS("StatsBomb_data.Rdata")
# minutes played per player per match
minutes <- statsbombMinsPlayed(d)
# cumulative time in possession by time elapsed per team
poss <- d %>%
group_by(match_id, possession) %>%
summarise(ElapsedTime = max(ElapsedTime), TimeInPoss = max(TimeInPoss), team.name = possession_team.name[1]) %>%
ungroup %>%
mutate(CumTimeInPoss = cumsum(TimeInPoss))
# cumulative opponent time in possession per player's playing time per match
OppPoss <- lapply(1:nrow(minutes), function(i) {
ss <- minutes[i,]
OppPoss <- poss %>%
filter(match_id == ss$match_id, ElapsedTime >= ss$start & ElapsedTime <= ss$end & team.name != ss$team.name) %>%
summarise(OppPoss = max(CumTimeInPoss) - min(CumTimeInPoss))
ss %>%
select(-start, -end) %>%
mutate(OppPoss = OppPoss$OppPoss)
}) %>%
plyr::rbind.fill()
# summarise opponent playing time per player
OppPoss <- OppPoss %>%
group_by(player.name) %>%
summarise(OppPoss = sum(OppPoss)) %>%
filter(OppPoss > 0)
# e.g. Padj tackles and interceptions -----------------------------------------
# all tackles and interceptions per player
tackint <- d %>%
filter((type.name == "Duel" & duel.type.name == "Tackle" & duel.outcome.name %in% c("Success In Play", "Success Out", "Won")) |
(type.name == "Interception" & interception.outcome.name %in% c("Success In Play", "Success Out", "Won"))) %>%
group_by(team.name, player.name) %>%
summarise(n = n())
# padj, here per 30 minutes opponent possession (around the average duration of any team's possession per match)
ti30 <- left_join(tackint,
OppPoss %>% select(player.name, OppPoss),
by = "player.name") %>%
mutate(ti30 = n / OppPoss * 60 * 30) %>%
arrange(ti30)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment