Skip to content

Instantly share code, notes, and snippets.

@solmos
Last active March 11, 2020 17:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save solmos/f92a174aa7a005c25c2f5db31988efe4 to your computer and use it in GitHub Desktop.
Save solmos/f92a174aa7a005c25c2f5db31988efe4 to your computer and use it in GitHub Desktop.
# Elo Rating System
library(eurolig)
library(tidyverse)
library(lubridate)
load("data/results.rds")
# Helpers -----------------------------------------------------------------
# Expected win probability before a game
getExpectedProb <- function(r_team, r_opp, home_adv, s) {
1 / (1 + 10 ^ ((r_opp - r_team - home_adv) / s))
}
# Get Elo rating for next season
getCarryOver <- function(rating, c) {
c * rating + 1505 * (1 - c)
}
# Get margin of victory multiplier
getMovMultiplier <- function(points_diff, elo_diff) {
((points_diff + 3) ^ 0.8) / (7.5 + 0.006 * elo_diff)
}
getEloSummary <- function(df) {
df %>%
pivot_longer(
cols = ends_with("_new"),
names_to = "type",
values_to = "elo"
) %>%
select(
season,
game_date,
team_code_home,
team_code_away,
type,
elo
) %>%
mutate(
team = ifelse(type == "elo_home_new", team_code_home, team_code_away),
order = rank(game_date),
team_id = paste0(team, "-", season)
) %>%
select(season, game_date, team, elo, order, team_id)
}
# Algorithm ---------------------------------------------------------------
# For a single season
getSeasonElo <- function(df, k, home_adv, s, initial_elo) {
team_ratings <- initial_elo
# Data frame to store the subsequent values obtained by the algorithm
ratings_df <- df %>%
mutate(
home_adv = NA,
win_points_home = NA,
win_points_away = NA,
expected_prob_home = NA,
expected_prob_away = NA,
mov_home = NA,
mov_away = NA,
elo_home_prev = NA,
elo_away_prev = NA,
elo_home_new = NA,
elo_away_new = NA,
prob_pred = NA
)
for (i in 1:nrow(df)) {
team_home <- df$team_code_home[i]
team_away <- df$team_code_away[i]
elo_home <- team_ratings[[team_home]]
elo_away <- team_ratings[[team_away]]
# Home advantage set to 0 for Final 4 games
h <- ifelse(df$phase[i] == "FF", 0, home_adv)
# Assign 1 for wins and 0 for losses
win_points_home <- ifelse(
df$points_home[i] > df$points_away[i],
1,
0
)
win_points_away <- ifelse(win_points_home == 0, 1, 0)
# Find pre-game win probabilities
expected_prob_home <- getExpectedProb(
r_team = elo_home,
r_opp = elo_away,
home_adv = h,
s = s)
expected_prob_away <- 1 - expected_prob_home
# Margin of victory multiplier
points_diff_abs <- abs(df$points_home[i] - df$points_away[i])
elo_diff_home <- elo_home + h - elo_away
elo_diff_away <- elo_away - elo_home - h
mov_home <- getMovMultiplier(points_diff_abs, elo_diff_home)
mov_away <- getMovMultiplier(points_diff_abs, elo_diff_away)
# Update Elo ratings
elo_home_new <- elo_home + k * (win_points_home - expected_prob_home) * mov_home
elo_away_new <- elo_away + k * (win_points_away - expected_prob_away) * mov_away
team_ratings[[team_home]] <- elo_home_new
team_ratings[[team_away]] <- elo_away_new
# TODO: Add probabilistic prediction?
prob_pred <- sample(
x = c(team_home, team_away),
size = 1,
prob = c(expected_prob_home, expected_prob_away)
)
ratings_df$home_adv[i] <- h
ratings_df$win_points_home[i] <- win_points_home
ratings_df$win_points_away[i] <- win_points_away
ratings_df$expected_prob_home[i] <- expected_prob_home
ratings_df$expected_prob_away[i] <- expected_prob_away
ratings_df$mov_home[i] <- mov_home
ratings_df$mov_away[i] <- mov_away
ratings_df$elo_home_prev[i] <- elo_home
ratings_df$elo_away_prev[i] <- elo_away
ratings_df$elo_home_new[i] <- elo_home_new
ratings_df$elo_away_new[i] <- elo_away_new
ratings_df$prob_pred[i] <- prob_pred
}
ratings_df <- ratings_df %>%
mutate(
winner = ifelse(points_home > points_away, team_code_home, team_code_away),
winner_pred = ifelse(elo_home_prev + home_adv >= elo_away_prev,
team_code_home, team_code_away),
correct_pred = ifelse(winner == winner_pred, TRUE, FALSE)
)
list(ratings_df = ratings_df, team_elo = team_ratings)
}
# Algorithm for several seasons
getElo <- function(df, k, home_adv, s, carry) {
df <- arrange(df, game_date)
season_results <- split(df, df$season)
# Start with first season
teams <- sort(unique(season_results[[1]]$team_code_home))
# Since it is the first season overall, all teams start with 1300 Elo points
initial_ratings <- as.list(rep(1300, length(teams)))
names(initial_ratings) <- teams
first_season_ratings <- getSeasonElo(
season_results[[1]],
k = k,
home_adv = home_adv,
s = s,
initial_elo = initial_ratings
)
# TODO: Use the last recorded Elo rating, not last season
elo_final <- tibble(
season = unique(season_results[[1]]$season),
team = names(first_season_ratings$team_elo),
elo = unlist(first_season_ratings$team_elo)
)
season_ratings <- vector("list", length(season_results))
season_ratings[[1]] <- first_season_ratings
for (i in 2:length(season_ratings)) {
teams <- sort(unique(season_results[[i]]$team_code_home))
teams_new <- teams[!teams %in% elo_final$team]
teams_new_elo <- as.list(rep(1300, length(teams_new)))
names(teams_new_elo) <- teams_new
teams_old <- teams[teams %in% elo_final$team]
teams_old_elo <- vector("list", length(teams_old))
names(teams_old_elo) <- teams_old
for (j in seq_along(teams_old)) {
elo_team <- elo_final %>%
filter(team == teams_old[j])
teams_old_elo[[j]] <- elo_team$elo[which.max(elo_team$season)]
}
teams_old_elo <- lapply(teams_old_elo, getCarryOver, c = carry)
initial_elo <- c(teams_new_elo, teams_old_elo)
season_ratings[[i]] <- getSeasonElo(
season_results[[i]],
k = k,
home_adv = home_adv,
s = s,
initial_elo = initial_elo
)
elo_final_season <- tibble(
season = unique(season_results[[i]]$season),
team = names(season_ratings[[i]]$team_elo),
elo = unlist(season_ratings[[i]]$team_elo)
)
elo_final <- bind_rows(elo_final, elo_final_season)
}
output_df <- map_df(season_ratings, function(x) x$ratings_df)
output_df
}
# Tunning -----------------------------------------------------------------
# Grid optimization
k <- seq(10, 50, by = 5)
h <- seq(0, 150, by = 25)
c <- seq(0.5, 1, by = 0.1)
grid_df <- expand_grid(k, h, c)
checkAccuracy <- function(df, k, h, c) {
df <- getElo(df, k, h, s = 400, c)
sum(df$correct_pred) / nrow(df)
}
n <- nrow(grid_df)
accuracy <- numeric(n)
for (i in 1:n) {
acc <- checkAccuracy(
df = results,
k = grid_df$k[i],
h = grid_df$h[i],
c = grid_df$c[i]
)
accuracy[i] <- acc
}
acc_df <- cbind(grid_df, accuracy) %>%
as_tibble() %>%
arrange(desc(accuracy))
# Ratings -----------------------------------------------------------------
elo_df <- getElo(results, k = 25, home_adv = 100, s = 400, carry = 0.8)
elo_summary <- getEloSummary(elo_df) %>%
left_join(teaminfo, by = c("team" = "team_code", "season"))
elo_summary %>%
ggplot(aes(order, elo, group = team_id)) +
geom_line()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment