Skip to content

Instantly share code, notes, and snippets.

@apoorvalal
Last active December 18, 2021 21:06
Show Gist options
  • Save apoorvalal/ac494a35282155bdc92899d14c01c193 to your computer and use it in GitHub Desktop.
Save apoorvalal/ac494a35282155bdc92899d14c01c193 to your computer and use it in GitHub Desktop.
construct measures of competitiveness of european leagues using match level data from https://github.com/jalapic/engsoccerdata
# %% ####################################################
rm(list = ls())
library(LalRUtils)
LalRUtils::libreq(ggplot2, data.table, fixest, magrittr,
patchwork, IRdisplay, did, panelView, plotly)
theme_set(lal_plot_theme())
options(repr.plot.width=12, repr.plot.height=9)
options(ggplot2.discrete.fill = RColorBrewer::brewer.pal(7, "Set2"))
options(ggplot2.discrete.colour = RColorBrewer::brewer.pal(7, "Set2"))
options(ggplot2.continuous.fill = "viridis"); options(ggplot2.continuous.colour = "viridis")
set.seed(42)
chr = function(...) as.character(...) %>% display_html()
# %% ####################################################
data = "../data/engsoccerdata-master/data"
list.files(data) %>% print
# %%
bel = import(file.path(data, "belgium.rda")) %>% setDT
eng = import(file.path(data, "england.rda")) %>% setDT
fra = import(file.path(data, "france.rda")) %>% setDT
ger = import(file.path(data, "germany.rda")) %>% setDT
hol = import(file.path(data, "holland.rda")) %>% setDT
ita = import(file.path(data, "italy.rda")) %>% setDT
esp = import(file.path(data, "spain.rda")) %>% setDT
por = import(file.path(data, "portugal.rda")) %>% setDT
# %%
long_matches = function(df){
df[, id := .I] # create match id
df[, RESULT := fcase(hgoal > vgoal, "HW", hgoal < vgoal, "VW", default = "draw")]
ids = setdiff(colnames(df), c("home", "visitor"))
matches_long = melt(df, id.vars = ids, measure = c("home", "visitor"),
variable.name = 'type', value.name = 'team')
matches_long[,
goals := ifelse(type == "home", hgoal, vgoal)][,
points := fcase(
type == "home" & RESULT == "HW", 3,
type == "visitor" & RESULT == "VW", 3,
RESULT == "draw", 1,
default = 0
)]
setorder(matches_long, Season, id)
return(matches_long)
}
competition_index = function(df){
df[, season_total := sum(points), by = .(Season, team)]
team_tallies = df[, .(total_points = season_total[1]), by = .(Season, team), ]
team_tallies[, rk := floor(frank(-total_points)), by = Season]
setorder(team_tallies, Season, rk)
team_tallies[, margin := total_points - lead(total_points), by = Season]
team_tallies[,
tot_points_all := sum(total_points), by = Season][,
share_points := (total_points / tot_points_all)][,
hhi := sum((share_points * 100)^2), by = Season][,
enop := 1/sum(share_points^2), by = Season]
summ_seasons = team_tallies[, .SD[1], Season, .SDcols = c("margin", "hhi", "enop")]
return(summ_seasons)
}
# %%
eng_comp = eng[division == 1] |> long_matches() |> competition_index()
fra_comp = fra[division == 1] |> long_matches() |> competition_index()
ger_comp = ger[division == 1] |> long_matches() |> competition_index()
hol_comp = hol[tier == 1] |> long_matches() |> competition_index()
ita_comp = ita[tier == 1] |> long_matches() |> competition_index()
esp_comp = esp[tier == 1 & round == "league"] |> long_matches() |> competition_index()
por_comp = por[division == "P1"] |> long_matches() |> competition_index()
# %%
all_leagues = rbindlist(mget(ls(pattern = "*_comp")), idcol = TRUE)
all_leagues[, country := fcase(
.id == "bel_comp", "bel",
.id == "eng_comp", "eng",
.id == "fra_comp", "fra",
.id == "gre_comp", "gre",
.id == "ger_comp", "ger",
.id == "hol_comp", "hol",
.id == "ita_comp", "ita",
.id == "esp_comp", "esp",
.id == "por_comp", "por",
.id == "tur_comp", "tur", default = ""
)][, .id := NULL]
# %% figures
f1 = all_leagues[Season>=1950] |>
ggplot(aes(Season, margin, group = country, colour = country)) +
geom_point() + geom_smooth(se = FALSE) +
scale_color_brewer(palette = "Set3") +
lal_plot_theme_d() + ggtitle("Title Winning Margin")
f2 = all_leagues[Season>=1950] |>
ggplot(aes(Season, hhi, group = country, colour = country)) +
geom_point() + geom_smooth(se = FALSE) +
scale_color_brewer(palette = "Set3") +
lal_plot_theme_d() + ggtitle("HHI of Points Tallies")
# %%
f = (f1 | f2)
ggsave("../figs/competitiveness.png", f, width = 15, height = 10)
# %%
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment