Last active
March 11, 2020 17:26
-
-
Save solmos/f92a174aa7a005c25c2f5db31988efe4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Elo Rating System | |
library(eurolig) | |
library(tidyverse) | |
library(lubridate) | |
load("data/results.rds") | |
# Helpers ----------------------------------------------------------------- | |
# Expected win probability before a game | |
getExpectedProb <- function(r_team, r_opp, home_adv, s) { | |
1 / (1 + 10 ^ ((r_opp - r_team - home_adv) / s)) | |
} | |
# Get Elo rating for next season | |
getCarryOver <- function(rating, c) { | |
c * rating + 1505 * (1 - c) | |
} | |
# Get margin of victory multiplier | |
getMovMultiplier <- function(points_diff, elo_diff) { | |
((points_diff + 3) ^ 0.8) / (7.5 + 0.006 * elo_diff) | |
} | |
getEloSummary <- function(df) { | |
df %>% | |
pivot_longer( | |
cols = ends_with("_new"), | |
names_to = "type", | |
values_to = "elo" | |
) %>% | |
select( | |
season, | |
game_date, | |
team_code_home, | |
team_code_away, | |
type, | |
elo | |
) %>% | |
mutate( | |
team = ifelse(type == "elo_home_new", team_code_home, team_code_away), | |
order = rank(game_date), | |
team_id = paste0(team, "-", season) | |
) %>% | |
select(season, game_date, team, elo, order, team_id) | |
} | |
# Algorithm --------------------------------------------------------------- | |
# For a single season | |
getSeasonElo <- function(df, k, home_adv, s, initial_elo) { | |
team_ratings <- initial_elo | |
# Data frame to store the subsequent values obtained by the algorithm | |
ratings_df <- df %>% | |
mutate( | |
home_adv = NA, | |
win_points_home = NA, | |
win_points_away = NA, | |
expected_prob_home = NA, | |
expected_prob_away = NA, | |
mov_home = NA, | |
mov_away = NA, | |
elo_home_prev = NA, | |
elo_away_prev = NA, | |
elo_home_new = NA, | |
elo_away_new = NA, | |
prob_pred = NA | |
) | |
for (i in 1:nrow(df)) { | |
team_home <- df$team_code_home[i] | |
team_away <- df$team_code_away[i] | |
elo_home <- team_ratings[[team_home]] | |
elo_away <- team_ratings[[team_away]] | |
# Home advantage set to 0 for Final 4 games | |
h <- ifelse(df$phase[i] == "FF", 0, home_adv) | |
# Assign 1 for wins and 0 for losses | |
win_points_home <- ifelse( | |
df$points_home[i] > df$points_away[i], | |
1, | |
0 | |
) | |
win_points_away <- ifelse(win_points_home == 0, 1, 0) | |
# Find pre-game win probabilities | |
expected_prob_home <- getExpectedProb( | |
r_team = elo_home, | |
r_opp = elo_away, | |
home_adv = h, | |
s = s) | |
expected_prob_away <- 1 - expected_prob_home | |
# Margin of victory multiplier | |
points_diff_abs <- abs(df$points_home[i] - df$points_away[i]) | |
elo_diff_home <- elo_home + h - elo_away | |
elo_diff_away <- elo_away - elo_home - h | |
mov_home <- getMovMultiplier(points_diff_abs, elo_diff_home) | |
mov_away <- getMovMultiplier(points_diff_abs, elo_diff_away) | |
# Update Elo ratings | |
elo_home_new <- elo_home + k * (win_points_home - expected_prob_home) * mov_home | |
elo_away_new <- elo_away + k * (win_points_away - expected_prob_away) * mov_away | |
team_ratings[[team_home]] <- elo_home_new | |
team_ratings[[team_away]] <- elo_away_new | |
# TODO: Add probabilistic prediction? | |
prob_pred <- sample( | |
x = c(team_home, team_away), | |
size = 1, | |
prob = c(expected_prob_home, expected_prob_away) | |
) | |
ratings_df$home_adv[i] <- h | |
ratings_df$win_points_home[i] <- win_points_home | |
ratings_df$win_points_away[i] <- win_points_away | |
ratings_df$expected_prob_home[i] <- expected_prob_home | |
ratings_df$expected_prob_away[i] <- expected_prob_away | |
ratings_df$mov_home[i] <- mov_home | |
ratings_df$mov_away[i] <- mov_away | |
ratings_df$elo_home_prev[i] <- elo_home | |
ratings_df$elo_away_prev[i] <- elo_away | |
ratings_df$elo_home_new[i] <- elo_home_new | |
ratings_df$elo_away_new[i] <- elo_away_new | |
ratings_df$prob_pred[i] <- prob_pred | |
} | |
ratings_df <- ratings_df %>% | |
mutate( | |
winner = ifelse(points_home > points_away, team_code_home, team_code_away), | |
winner_pred = ifelse(elo_home_prev + home_adv >= elo_away_prev, | |
team_code_home, team_code_away), | |
correct_pred = ifelse(winner == winner_pred, TRUE, FALSE) | |
) | |
list(ratings_df = ratings_df, team_elo = team_ratings) | |
} | |
# Algorithm for several seasons | |
getElo <- function(df, k, home_adv, s, carry) { | |
df <- arrange(df, game_date) | |
season_results <- split(df, df$season) | |
# Start with first season | |
teams <- sort(unique(season_results[[1]]$team_code_home)) | |
# Since it is the first season overall, all teams start with 1300 Elo points | |
initial_ratings <- as.list(rep(1300, length(teams))) | |
names(initial_ratings) <- teams | |
first_season_ratings <- getSeasonElo( | |
season_results[[1]], | |
k = k, | |
home_adv = home_adv, | |
s = s, | |
initial_elo = initial_ratings | |
) | |
# TODO: Use the last recorded Elo rating, not last season | |
elo_final <- tibble( | |
season = unique(season_results[[1]]$season), | |
team = names(first_season_ratings$team_elo), | |
elo = unlist(first_season_ratings$team_elo) | |
) | |
season_ratings <- vector("list", length(season_results)) | |
season_ratings[[1]] <- first_season_ratings | |
for (i in 2:length(season_ratings)) { | |
teams <- sort(unique(season_results[[i]]$team_code_home)) | |
teams_new <- teams[!teams %in% elo_final$team] | |
teams_new_elo <- as.list(rep(1300, length(teams_new))) | |
names(teams_new_elo) <- teams_new | |
teams_old <- teams[teams %in% elo_final$team] | |
teams_old_elo <- vector("list", length(teams_old)) | |
names(teams_old_elo) <- teams_old | |
for (j in seq_along(teams_old)) { | |
elo_team <- elo_final %>% | |
filter(team == teams_old[j]) | |
teams_old_elo[[j]] <- elo_team$elo[which.max(elo_team$season)] | |
} | |
teams_old_elo <- lapply(teams_old_elo, getCarryOver, c = carry) | |
initial_elo <- c(teams_new_elo, teams_old_elo) | |
season_ratings[[i]] <- getSeasonElo( | |
season_results[[i]], | |
k = k, | |
home_adv = home_adv, | |
s = s, | |
initial_elo = initial_elo | |
) | |
elo_final_season <- tibble( | |
season = unique(season_results[[i]]$season), | |
team = names(season_ratings[[i]]$team_elo), | |
elo = unlist(season_ratings[[i]]$team_elo) | |
) | |
elo_final <- bind_rows(elo_final, elo_final_season) | |
} | |
output_df <- map_df(season_ratings, function(x) x$ratings_df) | |
output_df | |
} | |
# Tunning ----------------------------------------------------------------- | |
# Grid optimization | |
k <- seq(10, 50, by = 5) | |
h <- seq(0, 150, by = 25) | |
c <- seq(0.5, 1, by = 0.1) | |
grid_df <- expand_grid(k, h, c) | |
checkAccuracy <- function(df, k, h, c) { | |
df <- getElo(df, k, h, s = 400, c) | |
sum(df$correct_pred) / nrow(df) | |
} | |
n <- nrow(grid_df) | |
accuracy <- numeric(n) | |
for (i in 1:n) { | |
acc <- checkAccuracy( | |
df = results, | |
k = grid_df$k[i], | |
h = grid_df$h[i], | |
c = grid_df$c[i] | |
) | |
accuracy[i] <- acc | |
} | |
acc_df <- cbind(grid_df, accuracy) %>% | |
as_tibble() %>% | |
arrange(desc(accuracy)) | |
# Ratings ----------------------------------------------------------------- | |
elo_df <- getElo(results, k = 25, home_adv = 100, s = 400, carry = 0.8) | |
elo_summary <- getEloSummary(elo_df) %>% | |
left_join(teaminfo, by = c("team" = "team_code", "season")) | |
elo_summary %>% | |
ggplot(aes(order, elo, group = team_id)) + | |
geom_line() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment