Skip to content

Instantly share code, notes, and snippets.

@cjtdevil
Created March 5, 2022 04:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cjtdevil/778e74329cc278d4f03f1c3a4c9761a7 to your computer and use it in GitHub Desktop.
Save cjtdevil/778e74329cc278d4f03f1c3a4c9761a7 to your computer and use it in GitHub Desktop.
library(tidyverse);library(readr);library(modelr);library(ggplot2)
# Import Players
sdf = read_csv("~EH_rapm_sk_stats_ev_regular_2022-03-02.csv") %>% #Downloaded from Evolving-Hockey.com
mutate(dGA = `xGA/60`-`GA/60`,
Position=ifelse(Position=="D","D","F")) %>%
select(Player:TOI,dGA) %>%
mutate(Season = as.numeric(substr(Season,4,5))+2000)
# Creating Lagged Seasons
pseason = list()
for(i in 1:3){
pseason[[i]] = sdf %>%
group_by(Player,Season) %>%
summarise(dGA = weighted.mean(dGA,TOI)) %>%
mutate(Season=Season+i) %>%
select(Player,Season,dGA) %>%
rename_at(vars(dGA),~paste0('l',i))
}
# Combining lagged seasons
sdf_time = sdf %>%
left_join(pseason[[1]],by=c('Player','Season')) %>%
left_join(pseason[[2]],by=c('Player','Season')) %>%
left_join(pseason[[3]],by=c('Player','Season')) %>%
mutate_all(~ifelse(is.na(.),0,.))
# Marcel-Projection for Players
pred_skater = lm(data=sdf_time,dGA~l1+l2+l3,weights = TOI)
summary(pred_skater)
sdf_players = sdf_time %>% ungroup %>%
add_predictions(pred_skater,'pred_dGA')
# Visuals for Skater Marcel Projection
ggplot(data=sdf_players%>%filter(TOI>500),
aes(x=pred_dGA,y=dGA))+
geom_point(alpha=0.7)+
geom_smooth(method='lm')+
xlab("Projected dGA/60")+
ylab("Actual dGA/60")+
ggtitle("Predictivity of Preseason Marcel-Projected RAPM_dGA/60")+
theme(plot.title = element_text(hjust=0.5))
ggplot(data=sdf_players%>%filter(TOI>500),
aes(pred_dGA,fill=Position))+
geom_density(alpha=0.7)+
xlab("Projected dGA/60")+
ylab("# of Players")+
ggtitle("Distribution of Impact on xGA-GA Differential (dGA/60)")+
theme(plot.title = element_text(hjust=0.5))
# Team Totals
sdf_teams = sdf_players %>%
ungroup %>% group_by(Team,Season) %>%
summarise(`skater-based_proj` = 5*weighted.mean(pred_dGA,TOI))
ggplot(data=sdf_teams,
aes(`skater-based_proj`))+
geom_histogram()+
xlab("Projected Team dGA/60")+
ylab("# of Team-Seasons")+
ggtitle("Distribution Cumulative Skater Impact on dGA/60 for Teams")+
theme(plot.title = element_text(hjust=0.5))
# Goalie-based work
gdf = read_csv("~EH_std_gl_stats_ev_regular_adj_2022-03-02.csv") %>% #Downloaded from Evolving-Hockey.com
select(Player:TOI,dSv = `dFSv%`) %>%
mutate(Season = as.numeric(substr(Season,4,5))+2000)
# Goalie lags
gseason = list()
for(i in 1:3){
gseason[[i]] = gdf %>%
group_by(Player,Season) %>%
summarise(dSv = weighted.mean(dSv,TOI)) %>%
mutate(Season=Season+i) %>%
select(Player,Season,dSv) %>%
rename_at(vars(dSv),~paste0('l',i))
}
# Combining goalie lags
gdf_time = gdf %>%
left_join(gseason[[1]],by=c('Player','Season')) %>%
left_join(gseason[[2]],by=c('Player','Season')) %>%
left_join(gseason[[3]],by=c('Player','Season')) %>%
mutate_all(~ifelse(is.na(.),-1,.)) %>%
left_join(sdf_teams,c('Season','Team'))
# Make goalie Marcel projection
pred_goalie = lm(data=gdf_time,dSv~l1+l2+l3,weights = TOI)
summary(pred_goalie)
gdf_time = gdf_time %>% add_predictions(pred_goalie,'goalie-based_proj')
# Compare goalie and skater projections
pred_dSv = lm(data=gdf_time,dSv~`goalie-based_proj`+`skater-based_proj`+0,weights = TOI)
summary(pred_dSv)
# Add adjustment to goalies
gdf_players = gdf_time %>% ungroup %>%
mutate(pred = `skater-based_proj`*pred_dSv$coefficients[2],
New_dSv = dSv-pred) %>% filter(TOI>500) %>%
arrange(-New_dSv) %>%
select(Player,Season,Team,dSv,xdSv=pred,ddSv=New_dSv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment