solmos/elo-rating.R

## elo-rating.R
# Elo Rating System

library(eurolig)
library(tidyverse)
library(lubridate)

load("data/results.rds")

# Helpers -----------------------------------------------------------------

# Expected win probability before a game
getExpectedProb <- function(r_team, r_opp, home_adv, s) {
  1 / (1 + 10 ^ ((r_opp - r_team - home_adv) / s))
}

# Get Elo rating for next season
getCarryOver <- function(rating, c) {
  c * rating + 1505 * (1 - c)
}

# Get margin of victory multiplier
getMovMultiplier <- function(points_diff, elo_diff) {
  ((points_diff + 3) ^ 0.8) / (7.5 + 0.006 * elo_diff)
}

getEloSummary <- function(df) {
  df %>%
    pivot_longer(
      cols = ends_with("_new"),
      names_to = "type",
      values_to = "elo"
    ) %>%
    select(
      season,
      game_date,
      team_code_home,
      team_code_away,
      type,
      elo
    ) %>%
    mutate(
      team = ifelse(type == "elo_home_new", team_code_home, team_code_away),
      order = rank(game_date),
      team_id = paste0(team, "-", season)
    ) %>%
    select(season, game_date, team, elo, order, team_id)
}

# Algorithm ---------------------------------------------------------------


# For a single season
getSeasonElo <- function(df, k, home_adv, s, initial_elo) {

  team_ratings <- initial_elo

  # Data frame to store the subsequent values obtained by the algorithm
  ratings_df <- df %>%
    mutate(
      home_adv = NA,
      win_points_home = NA,
      win_points_away = NA,
      expected_prob_home = NA,
      expected_prob_away = NA,
      mov_home = NA,
      mov_away = NA,
      elo_home_prev = NA,
      elo_away_prev = NA,
      elo_home_new = NA,
      elo_away_new = NA,
      prob_pred = NA
    )

  for (i in 1:nrow(df)) {
    team_home <- df$team_code_home[i]
    team_away <- df$team_code_away[i]

    elo_home <- team_ratings[[team_home]]
    elo_away <- team_ratings[[team_away]]

    # Home advantage set to 0 for Final 4 games
    h <- ifelse(df$phase[i] == "FF", 0, home_adv)

    # Assign 1 for wins and 0 for losses
    win_points_home <- ifelse(
      df$points_home[i] > df$points_away[i],
      1,
      0
    )
    win_points_away <- ifelse(win_points_home == 0, 1, 0)

    # Find pre-game win probabilities
    expected_prob_home <- getExpectedProb(
      r_team = elo_home,
      r_opp = elo_away,
      home_adv = h,
      s = s)
    expected_prob_away <- 1 - expected_prob_home

    # Margin of victory multiplier
    points_diff_abs <- abs(df$points_home[i] - df$points_away[i])
    elo_diff_home <- elo_home + h - elo_away
    elo_diff_away <- elo_away - elo_home - h

    mov_home <- getMovMultiplier(points_diff_abs, elo_diff_home)
    mov_away <- getMovMultiplier(points_diff_abs, elo_diff_away)

    # Update Elo ratings
    elo_home_new <- elo_home + k * (win_points_home - expected_prob_home) * mov_home
    elo_away_new <- elo_away + k * (win_points_away - expected_prob_away) * mov_away

    team_ratings[[team_home]] <- elo_home_new
    team_ratings[[team_away]] <- elo_away_new

    # TODO: Add probabilistic prediction?
    prob_pred <- sample(
      x = c(team_home, team_away),
      size = 1,
      prob = c(expected_prob_home, expected_prob_away)
    )

    ratings_df$home_adv[i] <- h
    ratings_df$win_points_home[i] <- win_points_home
    ratings_df$win_points_away[i] <- win_points_away
    ratings_df$expected_prob_home[i] <- expected_prob_home
    ratings_df$expected_prob_away[i] <- expected_prob_away
    ratings_df$mov_home[i] <- mov_home
    ratings_df$mov_away[i] <- mov_away
    ratings_df$elo_home_prev[i] <- elo_home
    ratings_df$elo_away_prev[i] <- elo_away
    ratings_df$elo_home_new[i] <- elo_home_new
    ratings_df$elo_away_new[i] <- elo_away_new
    ratings_df$prob_pred[i] <- prob_pred
  }

  ratings_df <- ratings_df %>%
    mutate(
      winner = ifelse(points_home > points_away, team_code_home, team_code_away),
      winner_pred = ifelse(elo_home_prev + home_adv >= elo_away_prev,
                           team_code_home, team_code_away),
      correct_pred = ifelse(winner == winner_pred, TRUE, FALSE)
    )

  list(ratings_df = ratings_df, team_elo = team_ratings)
}

# Algorithm for several seasons
getElo <- function(df, k, home_adv, s, carry) {
  df <- arrange(df, game_date)
  season_results <- split(df, df$season)

  # Start with first season
  teams <- sort(unique(season_results[[1]]$team_code_home))
  # Since it is the first season overall, all teams start with 1300 Elo points
  initial_ratings <- as.list(rep(1300, length(teams)))
  names(initial_ratings) <- teams

  first_season_ratings <- getSeasonElo(
    season_results[[1]],
    k = k,
    home_adv = home_adv,
    s = s,
    initial_elo = initial_ratings
  )

  # TODO: Use the last recorded Elo rating, not last season
  elo_final <- tibble(
    season = unique(season_results[[1]]$season),
    team = names(first_season_ratings$team_elo),
    elo = unlist(first_season_ratings$team_elo)
  )

  season_ratings <- vector("list", length(season_results))
  season_ratings[[1]] <- first_season_ratings
  for (i in 2:length(season_ratings)) {
    teams <- sort(unique(season_results[[i]]$team_code_home))

    teams_new <- teams[!teams %in% elo_final$team]
    teams_new_elo <- as.list(rep(1300, length(teams_new)))
    names(teams_new_elo) <- teams_new

    teams_old <- teams[teams %in% elo_final$team]
    teams_old_elo <- vector("list", length(teams_old))
    names(teams_old_elo) <- teams_old
    for (j in seq_along(teams_old)) {
      elo_team <- elo_final %>%
        filter(team == teams_old[j])
      teams_old_elo[[j]] <- elo_team$elo[which.max(elo_team$season)]
    }

    teams_old_elo <- lapply(teams_old_elo, getCarryOver, c = carry)
    initial_elo <- c(teams_new_elo, teams_old_elo)

    season_ratings[[i]] <- getSeasonElo(
      season_results[[i]],
      k = k,
      home_adv = home_adv,
      s = s,
      initial_elo = initial_elo
    )

    elo_final_season <- tibble(
      season = unique(season_results[[i]]$season),
      team = names(season_ratings[[i]]$team_elo),
      elo = unlist(season_ratings[[i]]$team_elo)
    )

    elo_final <- bind_rows(elo_final, elo_final_season)
  }

  output_df <- map_df(season_ratings, function(x) x$ratings_df)

  output_df
}

# Tunning -----------------------------------------------------------------

# Grid optimization
k <- seq(10, 50, by = 5)
h <- seq(0, 150, by = 25)
c <- seq(0.5, 1, by = 0.1)

grid_df <- expand_grid(k, h, c)

checkAccuracy <- function(df, k, h, c) {
  df <- getElo(df, k, h, s = 400, c)
  sum(df$correct_pred) / nrow(df)
}

n <- nrow(grid_df)
accuracy <- numeric(n)

for (i in 1:n) {
  acc <- checkAccuracy(
    df = results,
    k = grid_df$k[i],
    h = grid_df$h[i],
    c = grid_df$c[i]
  )
  accuracy[i] <- acc
}

acc_df <- cbind(grid_df, accuracy) %>%
  as_tibble() %>%
  arrange(desc(accuracy))

# Ratings -----------------------------------------------------------------

elo_df <- getElo(results, k = 25, home_adv = 100, s = 400, carry = 0.8)

elo_summary <- getEloSummary(elo_df) %>%
  left_join(teaminfo, by = c("team" = "team_code", "season"))

elo_summary %>%
  ggplot(aes(order, elo, group = team_id)) +
  geom_line()
	# Elo Rating System

	library(eurolig)
	library(tidyverse)
	library(lubridate)

	load("data/results.rds")

	# Helpers -----------------------------------------------------------------

	# Expected win probability before a game
	getExpectedProb <- function(r_team, r_opp, home_adv, s) {
	1 / (1 + 10 ^ ((r_opp - r_team - home_adv) / s))
	}

	# Get Elo rating for next season
	getCarryOver <- function(rating, c) {
	c * rating + 1505 * (1 - c)
	}

	# Get margin of victory multiplier
	getMovMultiplier <- function(points_diff, elo_diff) {
	((points_diff + 3) ^ 0.8) / (7.5 + 0.006 * elo_diff)
	}

	getEloSummary <- function(df) {
	df %>%
	pivot_longer(
	cols = ends_with("_new"),
	names_to = "type",
	values_to = "elo"
	) %>%
	select(
	season,
	game_date,
	team_code_home,
	team_code_away,
	type,
	elo
	) %>%
	mutate(
	team = ifelse(type == "elo_home_new", team_code_home, team_code_away),
	order = rank(game_date),
	team_id = paste0(team, "-", season)
	) %>%
	select(season, game_date, team, elo, order, team_id)
	}

	# Algorithm ---------------------------------------------------------------


	# For a single season
	getSeasonElo <- function(df, k, home_adv, s, initial_elo) {

	team_ratings <- initial_elo

	# Data frame to store the subsequent values obtained by the algorithm
	ratings_df <- df %>%
	mutate(
	home_adv = NA,
	win_points_home = NA,
	win_points_away = NA,
	expected_prob_home = NA,
	expected_prob_away = NA,
	mov_home = NA,
	mov_away = NA,
	elo_home_prev = NA,
	elo_away_prev = NA,
	elo_home_new = NA,
	elo_away_new = NA,
	prob_pred = NA
	)

	for (i in 1:nrow(df)) {
	team_home <- df$team_code_home[i]
	team_away <- df$team_code_away[i]

	elo_home <- team_ratings[[team_home]]
	elo_away <- team_ratings[[team_away]]

	# Home advantage set to 0 for Final 4 games
	h <- ifelse(df$phase[i] == "FF", 0, home_adv)

	# Assign 1 for wins and 0 for losses
	win_points_home <- ifelse(
	df$points_home[i] > df$points_away[i],
	1,
	0
	)
	win_points_away <- ifelse(win_points_home == 0, 1, 0)

	# Find pre-game win probabilities
	expected_prob_home <- getExpectedProb(
	r_team = elo_home,
	r_opp = elo_away,
	home_adv = h,
	s = s)
	expected_prob_away <- 1 - expected_prob_home

	# Margin of victory multiplier
	points_diff_abs <- abs(df$points_home[i] - df$points_away[i])
	elo_diff_home <- elo_home + h - elo_away
	elo_diff_away <- elo_away - elo_home - h

	mov_home <- getMovMultiplier(points_diff_abs, elo_diff_home)
	mov_away <- getMovMultiplier(points_diff_abs, elo_diff_away)

	# Update Elo ratings
	elo_home_new <- elo_home + k * (win_points_home - expected_prob_home) * mov_home
	elo_away_new <- elo_away + k * (win_points_away - expected_prob_away) * mov_away

	team_ratings[[team_home]] <- elo_home_new
	team_ratings[[team_away]] <- elo_away_new

	# TODO: Add probabilistic prediction?
	prob_pred <- sample(
	x = c(team_home, team_away),
	size = 1,
	prob = c(expected_prob_home, expected_prob_away)
	)

	ratings_df$home_adv[i] <- h
	ratings_df$win_points_home[i] <- win_points_home
	ratings_df$win_points_away[i] <- win_points_away
	ratings_df$expected_prob_home[i] <- expected_prob_home
	ratings_df$expected_prob_away[i] <- expected_prob_away
	ratings_df$mov_home[i] <- mov_home
	ratings_df$mov_away[i] <- mov_away
	ratings_df$elo_home_prev[i] <- elo_home
	ratings_df$elo_away_prev[i] <- elo_away
	ratings_df$elo_home_new[i] <- elo_home_new
	ratings_df$elo_away_new[i] <- elo_away_new
	ratings_df$prob_pred[i] <- prob_pred
	}

	ratings_df <- ratings_df %>%
	mutate(
	winner = ifelse(points_home > points_away, team_code_home, team_code_away),
	winner_pred = ifelse(elo_home_prev + home_adv >= elo_away_prev,
	team_code_home, team_code_away),
	correct_pred = ifelse(winner == winner_pred, TRUE, FALSE)
	)

	list(ratings_df = ratings_df, team_elo = team_ratings)
	}

	# Algorithm for several seasons
	getElo <- function(df, k, home_adv, s, carry) {
	df <- arrange(df, game_date)
	season_results <- split(df, df$season)

	# Start with first season
	teams <- sort(unique(season_results[[1]]$team_code_home))
	# Since it is the first season overall, all teams start with 1300 Elo points
	initial_ratings <- as.list(rep(1300, length(teams)))
	names(initial_ratings) <- teams

	first_season_ratings <- getSeasonElo(
	season_results[[1]],
	k = k,
	home_adv = home_adv,
	s = s,
	initial_elo = initial_ratings
	)

	# TODO: Use the last recorded Elo rating, not last season
	elo_final <- tibble(
	season = unique(season_results[[1]]$season),
	team = names(first_season_ratings$team_elo),
	elo = unlist(first_season_ratings$team_elo)
	)

	season_ratings <- vector("list", length(season_results))
	season_ratings[[1]] <- first_season_ratings
	for (i in 2:length(season_ratings)) {
	teams <- sort(unique(season_results[[i]]$team_code_home))

	teams_new <- teams[!teams %in% elo_final$team]
	teams_new_elo <- as.list(rep(1300, length(teams_new)))
	names(teams_new_elo) <- teams_new

	teams_old <- teams[teams %in% elo_final$team]
	teams_old_elo <- vector("list", length(teams_old))
	names(teams_old_elo) <- teams_old
	for (j in seq_along(teams_old)) {
	elo_team <- elo_final %>%
	filter(team == teams_old[j])
	teams_old_elo[[j]] <- elo_team$elo[which.max(elo_team$season)]
	}

	teams_old_elo <- lapply(teams_old_elo, getCarryOver, c = carry)
	initial_elo <- c(teams_new_elo, teams_old_elo)

	season_ratings[[i]] <- getSeasonElo(
	season_results[[i]],
	k = k,
	home_adv = home_adv,
	s = s,
	initial_elo = initial_elo
	)

	elo_final_season <- tibble(
	season = unique(season_results[[i]]$season),
	team = names(season_ratings[[i]]$team_elo),
	elo = unlist(season_ratings[[i]]$team_elo)
	)

	elo_final <- bind_rows(elo_final, elo_final_season)
	}

	output_df <- map_df(season_ratings, function(x) x$ratings_df)

	output_df
	}

	# Tunning -----------------------------------------------------------------

	# Grid optimization
	k <- seq(10, 50, by = 5)
	h <- seq(0, 150, by = 25)
	c <- seq(0.5, 1, by = 0.1)

	grid_df <- expand_grid(k, h, c)

	checkAccuracy <- function(df, k, h, c) {
	df <- getElo(df, k, h, s = 400, c)
	sum(df$correct_pred) / nrow(df)
	}

	n <- nrow(grid_df)
	accuracy <- numeric(n)

	for (i in 1:n) {
	acc <- checkAccuracy(
	df = results,
	k = grid_df$k[i],
	h = grid_df$h[i],
	c = grid_df$c[i]
	)
	accuracy[i] <- acc
	}

	acc_df <- cbind(grid_df, accuracy) %>%
	as_tibble() %>%
	arrange(desc(accuracy))

	# Ratings -----------------------------------------------------------------

	elo_df <- getElo(results, k = 25, home_adv = 100, s = 400, carry = 0.8)

	elo_summary <- getEloSummary(elo_df) %>%
	left_join(teaminfo, by = c("team" = "team_code", "season"))

	elo_summary %>%
	ggplot(aes(order, elo, group = team_id)) +
	geom_line()