Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@tejseth
Created December 10, 2020 15:13
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tejseth/4653275d30e9cf50e9ffcacd07010858 to your computer and use it in GitHub Desktop.
Save tejseth/4653275d30e9cf50e9ffcacd07010858 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(nflfastR)
library(ggplot2)
library(dplyr)
library(hrbrthemes)
library(ggrepel)
library(ggimage)
games <- readRDS(url("http://www.habitatring.com/games.rds"))
games2 <- games %>%
filter(season > 2009)
seasons <- 2010:2020
pbp <- map_df(seasons, function(x) {
readRDS(
url(
paste0("https://raw.githubusercontent.com/guga31bb/nflfastR-data/master/data/play_by_play_",x,".rds")
)
) %>%
filter(rush == 1 | pass == 1, week <= 17, !is.na(epa), !is.na(posteam), posteam != "")
})
results <- games2 %>%
filter(game_type == 'REG') %>%
select(game_id, season, week, home_team, home_score, away_team, away_score)
offense <- pbp %>%
group_by(game_id, posteam, season, week) %>%
summarize(plays = n(),
off_epa_play = mean(epa),
off_total_epa = sum(epa),
off_success_rate = mean(success),
explosive_play_rate = sum(epa>0.75) / plays,
bad_play_rate = sum(epa < -0.6)/ plays,
avg_wpa = mean(wpa, na.rm=T),
series_success = mean(series_success),
cpoe = mean(cpoe, na.rm=T),
avg_yardline = mean(100 - (yardline_100)))
results <- results %>%
mutate(
home_team = case_when(
home_team == 'OAK' ~ 'LV',
home_team == 'SD' ~ 'LAC',
home_team == 'STL' ~ 'LA',
TRUE ~ home_team
)
)
results <- results %>%
mutate(
away_team = case_when(
away_team == 'OAK' ~ 'LV',
away_team == 'SD' ~ 'LAC',
away_team == 'STL' ~ 'LA',
TRUE ~ away_team
)
)
defense <- pbp %>%
group_by(game_id, defteam, season, week) %>%
summarize(plays = n(),
def_good_play_rate = (sum(epa < -0.6)/plays))
home_results <- results %>%
select(-away_team, -away_score)
home_results <- home_results %>%
left_join(offense, by = c('game_id', 'season', 'week', 'home_team' = 'posteam'))
home_results <- home_results %>%
left_join(defense, by = c('game_id', 'season', 'week', 'home_team' = 'defteam'))
home_results %>%
select(-game_id, -season, -week, -home_team) %>%
cor(use="complete.obs") %>%
round(2)
names(home_results)[names(home_results) == 'plays'] <- 'home_plays'
names(home_results)[names(home_results) == 'off_epa_play'] <- 'home_off_epa_play'
names(home_results)[names(home_results) == 'off_total_epa'] <- 'home_off_total_epa'
names(home_results)[names(home_results) == 'off_success_rate'] <- 'home_off_success_rate'
names(home_results)[names(home_results) == 'off_sd_epa'] <- 'home_off_sd_epa'
names(home_results)[names(home_results) == 'explosive_play_rate'] <- 'home_explosive_play_rate'
names(home_results)[names(home_results) == 'bad_play_rate'] <- 'home_bad_play_rate'
names(home_results)[names(home_results) == 'def_good_play_rate.x'] <- 'home_def_good_play_rate'
away_results <- results %>%
select(-home_team, -home_score)
away_results <- away_results %>%
left_join(offense, by = c('game_id', 'season', 'week', 'away_team' = 'posteam'))
home_results <- home_results %>%
left_join(defense, by = c('game_id', 'season', 'week', 'home_team' = 'defteam'))
away_results %>%
select(-game_id, -season, -week, -away_team) %>%
cor(use="complete.obs") %>%
round(2)
names(away_results)[names(away_results) == 'plays'] <- 'away_plays'
names(away_results)[names(away_results) == 'off_epa_play'] <- 'away_off_epa_play'
names(away_results)[names(away_results) == 'off_total_epa'] <- 'away_off_total_epa'
names(away_results)[names(away_results) == 'off_success_rate'] <- 'away_off_success_rate'
names(away_results)[names(away_results) == 'off_sd_epa'] <- 'away_off_sd_epa'
names(away_results)[names(away_results) == 'explosive_play_rate'] <- 'away_explosive_play_rate'
names(away_results)[names(away_results) == 'bad_play_rate'] <- 'away_bad_play_rate'
home_away_results <- merge(home_results, away_results, by = c('game_id', 'season', 'week'))
home_fit <- lm(home_score ~
home_off_epa_play + home_off_total_epa + home_explosive_play_rate + home_bad_play_rate, data = home_results)
summary(home_fit)
home_preds <- predict(home_fit, home_results) %>%
as_tibble() %>%
rename(home_prediction = value) %>%
round(1) %>%
bind_cols(
home_results) %>%
select(game_id, season, week, home_team, home_prediction, home_score, home_off_epa_play) %>%
mutate(prediction_minus_actual = home_prediction - home_score)
away_fit <- lm(away_score ~
away_off_epa_play + away_off_total_epa + away_explosive_play_rate + away_bad_play_rate, data = away_results)
summary(away_fit)
away_preds <- predict(away_fit, away_results) %>%
as_tibble() %>%
rename(away_prediction = value) %>%
round(1) %>%
bind_cols(
away_results) %>%
select(game_id, season, week, away_team, away_prediction, away_score, away_off_epa_play) %>%
mutate(prediction_minus_actual = away_prediction - away_score)
home_away_preds <- merge(home_preds, away_preds, by = c("game_id", "season", "week"))
predictions_2020 <- home_away_preds %>%
filter(season == 2020) %>%
select(-home_off_epa_play, -away_off_epa_play) %>%
filter(!is.na(home_score))
names(predictions_2020)[names(predictions_2020) == 'prediction_minus_actual.x'] <- 'home_pred_minus_actual'
names(predictions_2020)[names(predictions_2020) == 'prediction_minus_actual.y'] <- 'away_pred_minus_actual'
home_pred_stats <- predictions_2020 %>%
group_by(home_team) %>%
summarize(home_games = n(),
total_home_pred = sum(home_prediction),
total_home_score = sum(home_score),
total_home_pred_minus_actual = sum(home_pred_minus_actual))
away_pred_stats <- predictions_2020 %>%
group_by(away_team) %>%
summarize(away_games = n(),
total_away_pred = sum(away_prediction),
total_away_score = sum(away_score),
total_away_pred_minus_actual = sum(away_pred_minus_actual))
home_defense_stats <- predictions_2020 %>%
group_by(home_team) %>%
summarize(away_games = n(),
total_away_pred = sum(away_prediction),
total_away_score = sum(away_score))
names(home_defense_stats)[names(home_defense_stats) == 'total_away_pred'] <- 'defense_pred'
names(home_defense_stats)[names(home_defense_stats) == 'total_away_actual'] <- 'defense_actual'
away_defense_stats <- predictions_2020 %>%
group_by(away_team) %>%
summarize(home_games = n(),
total_home_pred = sum(home_prediction),
total_home_score = sum(home_score))
names(away_defense_stats)[names(away_defense_stats) == 'total_home_pred'] <- 'away_defense_pred'
names(away_defense_stats)[names(away_defense_stats) == 'total_home_score'] <- 'away_defense_actual'
defense_predictions_2020 <- merge(home_defense_stats, away_defense_stats, by.x="home_team", by.y="away_team")
defense_predictions_2020 <- defense_predictions_2020 %>%
mutate(games = home_games + away_games,
total_d_pred = defense_pred + away_defense_pred,
total_d_points = total_away_score + away_defense_actual) %>%
select(home_team, games, total_d_pred, total_d_points)
defense_predictions_2020 <- defense_predictions_2020 %>%
mutate(pred_d_ppg = total_d_pred / games,
actual_d_ppg = total_d_points / games,
pred_minus_actual = (pred_d_ppg - actual_d_ppg))
final_predictions_2020 <- merge(home_pred_stats, away_pred_stats, by.x="home_team", by.y="away_team")
final_predictions_2020 <- final_predictions_2020 %>%
mutate(games = home_games + away_games,
total_pred = total_home_pred + total_away_pred,
total_points = total_home_score + total_away_score) %>%
select(home_team, games, total_pred, total_points)
final_predictions_2020 <- final_predictions_2020 %>%
mutate(pred_points_per_game = total_pred / games,
actual_points_per_game = total_points / games,
pred_minus_actual = (pred_points_per_game - actual_points_per_game))
teams_colors_logos <- teams_colors_logos
names(final_predictions_2020)[names(final_predictions_2020) == 'home_team'] <- 'team'
final_predictions_2020 <- final_predictions_2020 %>%
left_join(teams_colors_logos, by = c('team' = 'team_abbr'))
final_predictions_2020 %>%
ggplot(aes(x = pred_points_per_game, y = actual_points_per_game)) +
geom_hline(yintercept = mean(final_predictions_2020$actual_points_per_game), color = "blue", linetype = "dashed", alpha=0.5) +
geom_vline(xintercept = mean(final_predictions_2020$pred_points_per_game), color = "blue", linetype = "dashed", alpha=0.5) +
geom_image(aes(image = team_logo_espn), asp = 16 / 9) +
stat_smooth(geom='line', alpha=0.6, se=FALSE, method='lm')+
labs(x = "Average Post-Game Forecasted Points",
y = "Average Actual Points Per Game",
title = "Which NFL Teams Have Been Predicted to Score \n More/Less Points than They Actually Did",
subtitle = "The predicted points are determined using advanced statistics from each game, weeks 1-13",
caption = "Graph by Tej Seth | @mfbanalytics") +
theme_bw() +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
annotate("text", x = c(28, 22), y = c(20, 28),
label = c("Scored less \n than expected",
"Scored more \n than expected"), color = "blue")
ggsave('model3_13.png', dpi=300)
######################################################################
ggplot(home_preds, aes(x=home_prediction, y=home_score)) +
geom_point() +
geom_smooth() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
theme_bw() +
labs(title = "Home Team's Actual Score (Y-Axis) Vs. Modeled Score (X-Axis)",
x = "Modeled Score",
y = "Actual Score") +
theme(
plot.title = element_text(size = 14, hjust = 0.5, face = "bold")
)
ggsave('model2.png', dpi=300)
ggplot(home_preds, aes(x=prediction_minus_actual)) +
geom_histogram(fill = "dark green", binwidth = 1) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
theme_bw() +
geom_vline(xintercept = 0, color = "black") +
geom_vline(xintercept = -4.2, color = "red", linetype = "dashed", alpha = 0.6) +
geom_vline(xintercept = 3.9, color = "red", linetype = "dashed", alpha = 0.6) +
labs(title = "Home Team's Modeled Score Vs. Actual Score",
x = "Model's Score - Actual Score",
y = "Count") +
theme(
plot.title = element_text(size = 14, hjust = 0.5, face = "bold")
)
ggsave('model1.png', dpi=300)
######################################################################
week13 <- predictions_2020 %>%
filter(week == 13)
home_week13 <- week13 %>%
group_by(home_team) %>%
summarize(forecast = home_prediction,
score = home_score)
away_week13 <- week13 %>%
group_by(away_team) %>%
summarize(forecast = away_prediction,
score = away_score)
names(home_week13)[names(home_week13) == 'home_team'] <- 'team'
names(away_week13)[names(away_week13) == 'away_team'] <- 'team'
all_week13 <- rbind(home_week13, away_week13)
all_week13 <- all_week13 %>%
left_join(teams_colors_logos, by = c('team' = 'team_abbr'))
all_week13 %>%
ggplot(aes(x = forecast, y = score)) +
geom_hline(yintercept = mean(all_week12$score), color = "blue", linetype = "dashed", alpha=0.5) +
geom_vline(xintercept = mean(all_week12$forecast), color = "blue", linetype = "dashed", alpha=0.5) +
geom_image(aes(image = team_logo_espn), asp = 16 / 9) +
stat_smooth(geom='line', alpha=0.6, se=FALSE, method='lm')+
labs(x = "Post-Game Forecasted Points",
y = "Actual Points",
title = "NFL Week 13 Post-Game Forecasted Points ",
subtitle = "The forecasted points are determined using advanced in-game statistics",
caption = "Graph and Model by Tej Seth | @mfbanalytics") +
theme_bw() +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
annotate("text", x = c(30, 15), y = c(15, 30),
label = c("Scored less \n than expected",
"Scored more \n than expected"), color = "blue")
ggsave('week13_forecast.png', dpi=300)
library(tidyverse)
library(nflfastR)
library(ggplot2)
library(dplyr)
library(hrbrthemes)
library(ggrepel)
library(ggimage)
library(gt)
o_and_d_forecast <- merge(final_predictions_2020, defense_predictions_2020, by.x='team', by.y='home_team')
o_and_d_forecast <- o_and_d_forecast %>%
select(team, pred_points_per_game, pred_d_ppg, team_nick, team_logo_espn)
o_and_d_forecast %>%
ggplot(aes(x = pred_points_per_game, y = pred_d_ppg)) +
geom_hline(yintercept = mean(o_and_d_forecast$pred_d_ppg), color = "blue", linetype = "dashed", alpha=0.5) +
geom_vline(xintercept = mean(o_and_d_forecast$pred_points_per_game), color = "blue", linetype = "dashed", alpha=0.5) +
geom_image(aes(image = team_logo_espn), asp = 16 / 9) +
geom_abline(slope = -1.5, intercept = c(0, 3, 6, 9, 12, 15, 18, 21, 24), alpha = .4) +
labs(x = "Average Offensive Forecasted Points",
y = "Average Defensive Forecasted Points",
title = "NFL Tiers Based on Post-Game Forecasted Points",
subtitle = "The forecasted points are determined using advanced statistics from each game, weeks 1-13",
caption = "Graph by Tej Seth | @mfbanalytics") +
theme_bw() +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 16, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(hjust = 0.5)
) +
scale_y_reverse(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10))
ggsave('tiers_13.png', dpi=300)
week_13_guide <- week.13.betting.guide %>%
gt() %>%
tab_header(title = html("<strong>NFL Week 14 Betting Guide</strong>"),
subtitle = md("Only responsible for the bets you win, not responsible for the bets you lose")) %>%
cols_label(date_time = "Date and Time", away_team = "Away Team",
proj_a_points = "Projected Points", home_team = "Home Team", proj_home_points = "Projected Points",
spread = "Vegas Spread", team_to_bet = "Team to Bet", bet_str = "Bet Strength",
vegas_over_under = "Over/Under", total_proj_points = "Total Projected Points") %>%
data_color(
columns = vars(bet_str),
colors = scales::col_factor(
palette = c(
"dodgerblue2","navy", "skyblue1"),
domain = c("Weak","Medium", "Strong"))) %>%
data_color(
columns = vars(date_time),
colors = scales::col_factor(
palette = c(
"grey20","lightblue1", "lightblue2", "lightblue3", "lightblue4", "plum1"),
domain = c("Thursday, 8:20 PM","Sunday, 1:00 PM", "Sunday, 4:05 PM",
"Sunday, 4:25 PM", "Sunday, 8:20 PM", "Monday, 8:15 PM"))) %>%
tab_source_note(md("Model and table by Tej Seth: Using weeks 1-13 of a linear regression forecasted points model,
which is modeled based on advanced post-game statistics,
this table attempts to project the score to each NFL game for week 14")) %>%
text_transform(locations = cells_body(vars(away_team)),
fn = function(x) {
web_image(url = week.13.betting.guide$away_team,
height = px(40))}) %>%
text_transform(locations = cells_body(vars(home_team)),
fn = function(x) {
web_image(url = week.13.betting.guide$home_team,
height = px(40))}) %>%
text_transform(locations = cells_body(vars(team_to_bet)),
fn = function(x) {
web_image(url = week.13.betting.guide$team_to_bet,
height = px(40))}) %>%
data_color(
columns = vars(total_proj_points),
colors = scales::col_numeric(
palette = c(
"indianred1","lightgoldenrod1", "darkolivegreen1"),
domain = c(42,57))) %>%
cols_align(
align = "center")
week_13_guide
gtsave(week_13_guide, "week_13_guide.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment