Last active
March 3, 2024 18:00
-
-
Save boooeee/ed393cdc93723fab517bb6d596d48a47 to your computer and use it in GitHub Desktop.
This code derives a schedule/home field adjusted team ranking for a randomly generated set of games. The randomly generated games can be replaced with actual teams and outcomes from the sport of our choice.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
n_games <- 100 | |
# Define the teams | |
teams <- c("rock", "paper", "scissors") | |
# simulated home advantage # | |
home_adv<-2 | |
# Generate the data frame | |
#set.seed(123) # uncomment for reproducibility | |
games_df <- tibble( | |
home_team = sample(teams, n_games, replace = TRUE), | |
away_team = sample(teams, n_games, replace = TRUE), | |
home_score = sample(0:10, n_games, replace = TRUE) + sample(0:home_adv*2, n_games, replace = TRUE), | |
away_score = sample(0:10, n_games, replace = TRUE) | |
) | |
# Ensure home_team and away_team are not the same | |
games_df <- games_df %>% | |
filter(home_team != away_team) | |
games_df <- games_df %>% | |
# create margin and total fields # | |
mutate(margin=home_score-away_score,total=home_score+away_score) %>% | |
# convert team variables to factors # | |
mutate(home_team=factor(home_team,levels=teams),away_team=factor(away_team,levels=teams)) | |
# summarize "season" - for info only # | |
season_home<-games_df %>% | |
group_by(home_team) %>% | |
rename(team=home_team) %>% | |
summarise(home_games=n(),avg_home_margin=mean(margin)) | |
season_away<-games_df %>% | |
group_by(away_team) %>% | |
rename(team=away_team) %>% | |
summarise(away_games=n(),avg_away_margin=mean(-margin)) | |
season<-season_home %>% | |
full_join(season_away,by="team") | |
# create model matrices # | |
home_mm<-model.matrix(~games_df$home_team-1) | |
away_mm<-model.matrix(~games_df$away_team-1) | |
colnames(home_mm)<-colnames(away_mm)<-teams | |
# create model matrix for regression # | |
mm<-home_mm-away_mm | |
# create independent variable # | |
y<-games_df$margin | |
# run regression # | |
rate_rps<-lm(y ~ mm) | |
# extract coefficients to create ratings and derived home field advantage # | |
coeff<-rate_rps$coefficients | |
# derived home field advantage # | |
home_adv_derived<-coeff[1] | |
# ratings # | |
ratings_rps<-coeff[2:length(coeff)] | |
# replace NA values with zero # | |
# NOTE - this step is needed because the regression is unconstrained - there is probably a better way to do this, but this has always worked for me # | |
ratings_rps[is.na(ratings_rps)]<-0 | |
# create rating tibble # | |
ratings_rps <- tibble( | |
team=names(ratings_rps), | |
rating=ratings_rps | |
) | |
# renormalize rating to average to zero and clean team names # | |
ratings_rps<-ratings_rps %>% | |
mutate(team=str_replace(team,"mm","")) %>% | |
mutate(rating=rating-mean(rating)) %>% | |
arrange(desc(rating)) %>% | |
mutate(rank=rank(-rating)) %>% | |
relocate(rank) | |
print(home_adv_derived) | |
print(ratings_rps) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment