Skip to content

Instantly share code, notes, and snippets.

@solmos
Created April 23, 2020 15:55
Show Gist options
  • Save solmos/659cd030dcfee01091968ef5e849ece7 to your computer and use it in GitHub Desktop.
Save solmos/659cd030dcfee01091968ef5e849ece7 to your computer and use it in GitHub Desktop.
## Scorekeeper bias in LEB Oro
library(tidyverse)
data_file <- "https://github.com/solmos/feb-data/raw/master/assists-leb-oro.csv"
assists_leb_oro <- read_csv(data_file)
# Players -----------------------------------------------------------------
ast_player <- assists_leb_oro %>%
group_by(season, team, player, home) %>%
summarise(
ast_total = sum(ast),
ast_avg = mean(ast)
) %>%
ungroup()
# Players per season with more than 100 total assists in one season
ast_player_dif <- ast_player %>%
mutate(home = ifelse(home, "home", "away")) %>%
pivot_wider(
id_cols = c(season, team, player),
names_from = home,
values_from = c(ast_total, ast_avg)
) %>%
mutate(
ast_total = ast_total_home + ast_total_away,
ast_total_dif = ast_total_home - ast_total_away,
ast_total_ratio = ast_total_home / ast_total_away
) %>%
filter(ast_total > 100)
# Percentage of players with more assists at home
sum(ast_player_dif$ast_total_dif > 0) / nrow(ast_player_dif)
# Ratio
players_table <- ast_player_dif %>%
arrange(desc(ast_total_ratio)) %>%
mutate(
last_name = str_extract(player, "[A-ZÀ-ÿ]+"),
first_name = str_remove(str_extract(player, ", [A-ZÀ-ÿ]+"), ", "),
player_name = paste(first_name, last_name)
) %>%
select(
season, player_name, team,
ast_total, ast_total_home, ast_total_away,
ast_total_ratio
)
players_table
# Team --------------------------------------------------------------------
ast_team <- assists_leb_oro %>%
group_by(game_code, team) %>%
summarise(
season = unique(season),
home = unique(home),
m_fg = sum(m_fg),
ast = sum(ast)
) %>%
ungroup() %>%
mutate(fg_ast_pct = ast / m_fg)
ast_team
ast_diff <- ast_team %>%
group_by(season, team, home) %>%
summarise(
fg_ast_pct = mean(fg_ast_pct)
) %>%
ungroup() %>%
pivot_wider(names_from = home, values_from = fg_ast_pct) %>%
rename(away = `FALSE`, home = `TRUE`) %>%
mutate(d = home - away)
ast_diff %>%
group_by(season) %>%
summarise(d = mean(d) * 100) %>%
ggplot(aes(season, d)) +
geom_point() +
geom_line() +
ylim(c(0, 13))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment