Last active
March 13, 2019 09:57
-
-
Save JoGall/b4d15a047152ddc2eaf5bf6b9408e859 to your computer and use it in GitHub Desktop.
Possession adjust metrics for StatsBomb data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Compute total minutes played per player ------------------------------- | |
statsbombMinsPlayed <- function(dat) { | |
dat <- dat %>% | |
group_by(match_id) %>% | |
mutate(end = max(ElapsedTime)) | |
# starting XI | |
minutes <- dat %>% | |
group_by(match_id, player.name) %>% | |
filter(row_number() == 1) %>% | |
select(team.name, player.name, ElapsedTime, end) %>% | |
na.omit() %>% | |
group_by(match_id) %>% | |
top_n(22, -ElapsedTime) %>% | |
mutate(start = 0) %>% | |
select(-ElapsedTime) | |
# substitutes | |
minutes <- rbind( | |
minutes, | |
dat %>% | |
filter(type.name == "Substitution") %>% | |
select(match_id, team.name, player.name = substitution.replacement.name, end, start = ElapsedTime) | |
) | |
minutes <- left_join( | |
minutes, | |
dat %>% | |
filter(type.name == "Substitution") %>% | |
select(match_id, team.name, player.name, end = ElapsedTime), | |
by = c("match_id", "team.name", "player.name") | |
) | |
minutes <- minutes %>% | |
mutate(end = if_else(end.y < end.x, end.y, end.x), | |
end = if_else(is.na(end.y), end.x, end.y)) %>% | |
mutate(mins = (end - start) / 60) %>% | |
select(-end.x, -end.y) | |
return(minutes) | |
} | |
# Get opposition time in possession for each player ------------------------------- | |
d <- readRDS("StatsBomb_data.Rdata") | |
# minutes played per player per match | |
minutes <- statsbombMinsPlayed(d) | |
# cumulative time in possession by time elapsed per team | |
poss <- d %>% | |
group_by(match_id, possession) %>% | |
summarise(ElapsedTime = max(ElapsedTime), TimeInPoss = max(TimeInPoss), team.name = possession_team.name[1]) %>% | |
ungroup %>% | |
mutate(CumTimeInPoss = cumsum(TimeInPoss)) | |
# cumulative opponent time in possession per player's playing time per match | |
OppPoss <- lapply(1:nrow(minutes), function(i) { | |
ss <- minutes[i,] | |
OppPoss <- poss %>% | |
filter(match_id == ss$match_id, ElapsedTime >= ss$start & ElapsedTime <= ss$end & team.name != ss$team.name) %>% | |
summarise(OppPoss = max(CumTimeInPoss) - min(CumTimeInPoss)) | |
ss %>% | |
select(-start, -end) %>% | |
mutate(OppPoss = OppPoss$OppPoss) | |
}) %>% | |
plyr::rbind.fill() | |
# summarise opponent playing time per player | |
OppPoss <- OppPoss %>% | |
group_by(player.name) %>% | |
summarise(OppPoss = sum(OppPoss)) %>% | |
filter(OppPoss > 0) | |
# e.g. Padj tackles and interceptions ----------------------------------------- | |
# all tackles and interceptions per player | |
tackint <- d %>% | |
filter((type.name == "Duel" & duel.type.name == "Tackle" & duel.outcome.name %in% c("Success In Play", "Success Out", "Won")) | | |
(type.name == "Interception" & interception.outcome.name %in% c("Success In Play", "Success Out", "Won"))) %>% | |
group_by(team.name, player.name) %>% | |
summarise(n = n()) | |
# padj, here per 30 minutes opponent possession (around the average duration of any team's possession per match) | |
ti30 <- left_join(tackint, | |
OppPoss %>% select(player.name, OppPoss), | |
by = "player.name") %>% | |
mutate(ti30 = n / OppPoss * 60 * 30) %>% | |
arrange(ti30) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment