Created
December 10, 2020 15:13
-
-
Save tejseth/4653275d30e9cf50e9ffcacd07010858 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(nflfastR) | |
library(ggplot2) | |
library(dplyr) | |
library(hrbrthemes) | |
library(ggrepel) | |
library(ggimage) | |
games <- readRDS(url("http://www.habitatring.com/games.rds")) | |
games2 <- games %>% | |
filter(season > 2009) | |
seasons <- 2010:2020 | |
pbp <- map_df(seasons, function(x) { | |
readRDS( | |
url( | |
paste0("https://raw.githubusercontent.com/guga31bb/nflfastR-data/master/data/play_by_play_",x,".rds") | |
) | |
) %>% | |
filter(rush == 1 | pass == 1, week <= 17, !is.na(epa), !is.na(posteam), posteam != "") | |
}) | |
results <- games2 %>% | |
filter(game_type == 'REG') %>% | |
select(game_id, season, week, home_team, home_score, away_team, away_score) | |
offense <- pbp %>% | |
group_by(game_id, posteam, season, week) %>% | |
summarize(plays = n(), | |
off_epa_play = mean(epa), | |
off_total_epa = sum(epa), | |
off_success_rate = mean(success), | |
explosive_play_rate = sum(epa>0.75) / plays, | |
bad_play_rate = sum(epa < -0.6)/ plays, | |
avg_wpa = mean(wpa, na.rm=T), | |
series_success = mean(series_success), | |
cpoe = mean(cpoe, na.rm=T), | |
avg_yardline = mean(100 - (yardline_100))) | |
results <- results %>% | |
mutate( | |
home_team = case_when( | |
home_team == 'OAK' ~ 'LV', | |
home_team == 'SD' ~ 'LAC', | |
home_team == 'STL' ~ 'LA', | |
TRUE ~ home_team | |
) | |
) | |
results <- results %>% | |
mutate( | |
away_team = case_when( | |
away_team == 'OAK' ~ 'LV', | |
away_team == 'SD' ~ 'LAC', | |
away_team == 'STL' ~ 'LA', | |
TRUE ~ away_team | |
) | |
) | |
defense <- pbp %>% | |
group_by(game_id, defteam, season, week) %>% | |
summarize(plays = n(), | |
def_good_play_rate = (sum(epa < -0.6)/plays)) | |
home_results <- results %>% | |
select(-away_team, -away_score) | |
home_results <- home_results %>% | |
left_join(offense, by = c('game_id', 'season', 'week', 'home_team' = 'posteam')) | |
home_results <- home_results %>% | |
left_join(defense, by = c('game_id', 'season', 'week', 'home_team' = 'defteam')) | |
home_results %>% | |
select(-game_id, -season, -week, -home_team) %>% | |
cor(use="complete.obs") %>% | |
round(2) | |
names(home_results)[names(home_results) == 'plays'] <- 'home_plays' | |
names(home_results)[names(home_results) == 'off_epa_play'] <- 'home_off_epa_play' | |
names(home_results)[names(home_results) == 'off_total_epa'] <- 'home_off_total_epa' | |
names(home_results)[names(home_results) == 'off_success_rate'] <- 'home_off_success_rate' | |
names(home_results)[names(home_results) == 'off_sd_epa'] <- 'home_off_sd_epa' | |
names(home_results)[names(home_results) == 'explosive_play_rate'] <- 'home_explosive_play_rate' | |
names(home_results)[names(home_results) == 'bad_play_rate'] <- 'home_bad_play_rate' | |
names(home_results)[names(home_results) == 'def_good_play_rate.x'] <- 'home_def_good_play_rate' | |
away_results <- results %>% | |
select(-home_team, -home_score) | |
away_results <- away_results %>% | |
left_join(offense, by = c('game_id', 'season', 'week', 'away_team' = 'posteam')) | |
home_results <- home_results %>% | |
left_join(defense, by = c('game_id', 'season', 'week', 'home_team' = 'defteam')) | |
away_results %>% | |
select(-game_id, -season, -week, -away_team) %>% | |
cor(use="complete.obs") %>% | |
round(2) | |
names(away_results)[names(away_results) == 'plays'] <- 'away_plays' | |
names(away_results)[names(away_results) == 'off_epa_play'] <- 'away_off_epa_play' | |
names(away_results)[names(away_results) == 'off_total_epa'] <- 'away_off_total_epa' | |
names(away_results)[names(away_results) == 'off_success_rate'] <- 'away_off_success_rate' | |
names(away_results)[names(away_results) == 'off_sd_epa'] <- 'away_off_sd_epa' | |
names(away_results)[names(away_results) == 'explosive_play_rate'] <- 'away_explosive_play_rate' | |
names(away_results)[names(away_results) == 'bad_play_rate'] <- 'away_bad_play_rate' | |
home_away_results <- merge(home_results, away_results, by = c('game_id', 'season', 'week')) | |
home_fit <- lm(home_score ~ | |
home_off_epa_play + home_off_total_epa + home_explosive_play_rate + home_bad_play_rate, data = home_results) | |
summary(home_fit) | |
home_preds <- predict(home_fit, home_results) %>% | |
as_tibble() %>% | |
rename(home_prediction = value) %>% | |
round(1) %>% | |
bind_cols( | |
home_results) %>% | |
select(game_id, season, week, home_team, home_prediction, home_score, home_off_epa_play) %>% | |
mutate(prediction_minus_actual = home_prediction - home_score) | |
away_fit <- lm(away_score ~ | |
away_off_epa_play + away_off_total_epa + away_explosive_play_rate + away_bad_play_rate, data = away_results) | |
summary(away_fit) | |
away_preds <- predict(away_fit, away_results) %>% | |
as_tibble() %>% | |
rename(away_prediction = value) %>% | |
round(1) %>% | |
bind_cols( | |
away_results) %>% | |
select(game_id, season, week, away_team, away_prediction, away_score, away_off_epa_play) %>% | |
mutate(prediction_minus_actual = away_prediction - away_score) | |
home_away_preds <- merge(home_preds, away_preds, by = c("game_id", "season", "week")) | |
predictions_2020 <- home_away_preds %>% | |
filter(season == 2020) %>% | |
select(-home_off_epa_play, -away_off_epa_play) %>% | |
filter(!is.na(home_score)) | |
names(predictions_2020)[names(predictions_2020) == 'prediction_minus_actual.x'] <- 'home_pred_minus_actual' | |
names(predictions_2020)[names(predictions_2020) == 'prediction_minus_actual.y'] <- 'away_pred_minus_actual' | |
home_pred_stats <- predictions_2020 %>% | |
group_by(home_team) %>% | |
summarize(home_games = n(), | |
total_home_pred = sum(home_prediction), | |
total_home_score = sum(home_score), | |
total_home_pred_minus_actual = sum(home_pred_minus_actual)) | |
away_pred_stats <- predictions_2020 %>% | |
group_by(away_team) %>% | |
summarize(away_games = n(), | |
total_away_pred = sum(away_prediction), | |
total_away_score = sum(away_score), | |
total_away_pred_minus_actual = sum(away_pred_minus_actual)) | |
home_defense_stats <- predictions_2020 %>% | |
group_by(home_team) %>% | |
summarize(away_games = n(), | |
total_away_pred = sum(away_prediction), | |
total_away_score = sum(away_score)) | |
names(home_defense_stats)[names(home_defense_stats) == 'total_away_pred'] <- 'defense_pred' | |
names(home_defense_stats)[names(home_defense_stats) == 'total_away_actual'] <- 'defense_actual' | |
away_defense_stats <- predictions_2020 %>% | |
group_by(away_team) %>% | |
summarize(home_games = n(), | |
total_home_pred = sum(home_prediction), | |
total_home_score = sum(home_score)) | |
names(away_defense_stats)[names(away_defense_stats) == 'total_home_pred'] <- 'away_defense_pred' | |
names(away_defense_stats)[names(away_defense_stats) == 'total_home_score'] <- 'away_defense_actual' | |
defense_predictions_2020 <- merge(home_defense_stats, away_defense_stats, by.x="home_team", by.y="away_team") | |
defense_predictions_2020 <- defense_predictions_2020 %>% | |
mutate(games = home_games + away_games, | |
total_d_pred = defense_pred + away_defense_pred, | |
total_d_points = total_away_score + away_defense_actual) %>% | |
select(home_team, games, total_d_pred, total_d_points) | |
defense_predictions_2020 <- defense_predictions_2020 %>% | |
mutate(pred_d_ppg = total_d_pred / games, | |
actual_d_ppg = total_d_points / games, | |
pred_minus_actual = (pred_d_ppg - actual_d_ppg)) | |
final_predictions_2020 <- merge(home_pred_stats, away_pred_stats, by.x="home_team", by.y="away_team") | |
final_predictions_2020 <- final_predictions_2020 %>% | |
mutate(games = home_games + away_games, | |
total_pred = total_home_pred + total_away_pred, | |
total_points = total_home_score + total_away_score) %>% | |
select(home_team, games, total_pred, total_points) | |
final_predictions_2020 <- final_predictions_2020 %>% | |
mutate(pred_points_per_game = total_pred / games, | |
actual_points_per_game = total_points / games, | |
pred_minus_actual = (pred_points_per_game - actual_points_per_game)) | |
teams_colors_logos <- teams_colors_logos | |
names(final_predictions_2020)[names(final_predictions_2020) == 'home_team'] <- 'team' | |
final_predictions_2020 <- final_predictions_2020 %>% | |
left_join(teams_colors_logos, by = c('team' = 'team_abbr')) | |
final_predictions_2020 %>% | |
ggplot(aes(x = pred_points_per_game, y = actual_points_per_game)) + | |
geom_hline(yintercept = mean(final_predictions_2020$actual_points_per_game), color = "blue", linetype = "dashed", alpha=0.5) + | |
geom_vline(xintercept = mean(final_predictions_2020$pred_points_per_game), color = "blue", linetype = "dashed", alpha=0.5) + | |
geom_image(aes(image = team_logo_espn), asp = 16 / 9) + | |
stat_smooth(geom='line', alpha=0.6, se=FALSE, method='lm')+ | |
labs(x = "Average Post-Game Forecasted Points", | |
y = "Average Actual Points Per Game", | |
title = "Which NFL Teams Have Been Predicted to Score \n More/Less Points than They Actually Did", | |
subtitle = "The predicted points are determined using advanced statistics from each game, weeks 1-13", | |
caption = "Graph by Tej Seth | @mfbanalytics") + | |
theme_bw() + | |
theme( | |
aspect.ratio = 9 / 16, | |
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"), | |
plot.subtitle = element_text(hjust = 0.5) | |
) + | |
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + | |
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) + | |
annotate("text", x = c(28, 22), y = c(20, 28), | |
label = c("Scored less \n than expected", | |
"Scored more \n than expected"), color = "blue") | |
ggsave('model3_13.png', dpi=300) | |
###################################################################### | |
ggplot(home_preds, aes(x=home_prediction, y=home_score)) + | |
geom_point() + | |
geom_smooth() + | |
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) + | |
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + | |
theme_bw() + | |
labs(title = "Home Team's Actual Score (Y-Axis) Vs. Modeled Score (X-Axis)", | |
x = "Modeled Score", | |
y = "Actual Score") + | |
theme( | |
plot.title = element_text(size = 14, hjust = 0.5, face = "bold") | |
) | |
ggsave('model2.png', dpi=300) | |
ggplot(home_preds, aes(x=prediction_minus_actual)) + | |
geom_histogram(fill = "dark green", binwidth = 1) + | |
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) + | |
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + | |
theme_bw() + | |
geom_vline(xintercept = 0, color = "black") + | |
geom_vline(xintercept = -4.2, color = "red", linetype = "dashed", alpha = 0.6) + | |
geom_vline(xintercept = 3.9, color = "red", linetype = "dashed", alpha = 0.6) + | |
labs(title = "Home Team's Modeled Score Vs. Actual Score", | |
x = "Model's Score - Actual Score", | |
y = "Count") + | |
theme( | |
plot.title = element_text(size = 14, hjust = 0.5, face = "bold") | |
) | |
ggsave('model1.png', dpi=300) | |
###################################################################### | |
week13 <- predictions_2020 %>% | |
filter(week == 13) | |
home_week13 <- week13 %>% | |
group_by(home_team) %>% | |
summarize(forecast = home_prediction, | |
score = home_score) | |
away_week13 <- week13 %>% | |
group_by(away_team) %>% | |
summarize(forecast = away_prediction, | |
score = away_score) | |
names(home_week13)[names(home_week13) == 'home_team'] <- 'team' | |
names(away_week13)[names(away_week13) == 'away_team'] <- 'team' | |
all_week13 <- rbind(home_week13, away_week13) | |
all_week13 <- all_week13 %>% | |
left_join(teams_colors_logos, by = c('team' = 'team_abbr')) | |
all_week13 %>% | |
ggplot(aes(x = forecast, y = score)) + | |
geom_hline(yintercept = mean(all_week12$score), color = "blue", linetype = "dashed", alpha=0.5) + | |
geom_vline(xintercept = mean(all_week12$forecast), color = "blue", linetype = "dashed", alpha=0.5) + | |
geom_image(aes(image = team_logo_espn), asp = 16 / 9) + | |
stat_smooth(geom='line', alpha=0.6, se=FALSE, method='lm')+ | |
labs(x = "Post-Game Forecasted Points", | |
y = "Actual Points", | |
title = "NFL Week 13 Post-Game Forecasted Points ", | |
subtitle = "The forecasted points are determined using advanced in-game statistics", | |
caption = "Graph and Model by Tej Seth | @mfbanalytics") + | |
theme_bw() + | |
theme( | |
aspect.ratio = 9 / 16, | |
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"), | |
plot.subtitle = element_text(hjust = 0.5) | |
) + | |
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) + | |
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) + | |
annotate("text", x = c(30, 15), y = c(15, 30), | |
label = c("Scored less \n than expected", | |
"Scored more \n than expected"), color = "blue") | |
ggsave('week13_forecast.png', dpi=300) | |
library(tidyverse) | |
library(nflfastR) | |
library(ggplot2) | |
library(dplyr) | |
library(hrbrthemes) | |
library(ggrepel) | |
library(ggimage) | |
library(gt) | |
o_and_d_forecast <- merge(final_predictions_2020, defense_predictions_2020, by.x='team', by.y='home_team') | |
o_and_d_forecast <- o_and_d_forecast %>% | |
select(team, pred_points_per_game, pred_d_ppg, team_nick, team_logo_espn) | |
o_and_d_forecast %>% | |
ggplot(aes(x = pred_points_per_game, y = pred_d_ppg)) + | |
geom_hline(yintercept = mean(o_and_d_forecast$pred_d_ppg), color = "blue", linetype = "dashed", alpha=0.5) + | |
geom_vline(xintercept = mean(o_and_d_forecast$pred_points_per_game), color = "blue", linetype = "dashed", alpha=0.5) + | |
geom_image(aes(image = team_logo_espn), asp = 16 / 9) + | |
geom_abline(slope = -1.5, intercept = c(0, 3, 6, 9, 12, 15, 18, 21, 24), alpha = .4) + | |
labs(x = "Average Offensive Forecasted Points", | |
y = "Average Defensive Forecasted Points", | |
title = "NFL Tiers Based on Post-Game Forecasted Points", | |
subtitle = "The forecasted points are determined using advanced statistics from each game, weeks 1-13", | |
caption = "Graph by Tej Seth | @mfbanalytics") + | |
theme_bw() + | |
theme( | |
aspect.ratio = 9 / 16, | |
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"), | |
plot.subtitle = element_text(hjust = 0.5) | |
) + | |
scale_y_reverse(breaks = scales::pretty_breaks(n = 10)) + | |
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) | |
ggsave('tiers_13.png', dpi=300) | |
week_13_guide <- week.13.betting.guide %>% | |
gt() %>% | |
tab_header(title = html("<strong>NFL Week 14 Betting Guide</strong>"), | |
subtitle = md("Only responsible for the bets you win, not responsible for the bets you lose")) %>% | |
cols_label(date_time = "Date and Time", away_team = "Away Team", | |
proj_a_points = "Projected Points", home_team = "Home Team", proj_home_points = "Projected Points", | |
spread = "Vegas Spread", team_to_bet = "Team to Bet", bet_str = "Bet Strength", | |
vegas_over_under = "Over/Under", total_proj_points = "Total Projected Points") %>% | |
data_color( | |
columns = vars(bet_str), | |
colors = scales::col_factor( | |
palette = c( | |
"dodgerblue2","navy", "skyblue1"), | |
domain = c("Weak","Medium", "Strong"))) %>% | |
data_color( | |
columns = vars(date_time), | |
colors = scales::col_factor( | |
palette = c( | |
"grey20","lightblue1", "lightblue2", "lightblue3", "lightblue4", "plum1"), | |
domain = c("Thursday, 8:20 PM","Sunday, 1:00 PM", "Sunday, 4:05 PM", | |
"Sunday, 4:25 PM", "Sunday, 8:20 PM", "Monday, 8:15 PM"))) %>% | |
tab_source_note(md("Model and table by Tej Seth: Using weeks 1-13 of a linear regression forecasted points model, | |
which is modeled based on advanced post-game statistics, | |
this table attempts to project the score to each NFL game for week 14")) %>% | |
text_transform(locations = cells_body(vars(away_team)), | |
fn = function(x) { | |
web_image(url = week.13.betting.guide$away_team, | |
height = px(40))}) %>% | |
text_transform(locations = cells_body(vars(home_team)), | |
fn = function(x) { | |
web_image(url = week.13.betting.guide$home_team, | |
height = px(40))}) %>% | |
text_transform(locations = cells_body(vars(team_to_bet)), | |
fn = function(x) { | |
web_image(url = week.13.betting.guide$team_to_bet, | |
height = px(40))}) %>% | |
data_color( | |
columns = vars(total_proj_points), | |
colors = scales::col_numeric( | |
palette = c( | |
"indianred1","lightgoldenrod1", "darkolivegreen1"), | |
domain = c(42,57))) %>% | |
cols_align( | |
align = "center") | |
week_13_guide | |
gtsave(week_13_guide, "week_13_guide.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment