Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
library(tidyverse)
library(dplyr)
library(na.tools)
library(ggimage)
library(nflfastR)
library(ggrepel)
library(ggimage)
library(ggtext)
library(mgcv)
library(scales)
library(ggforce)
library(gt)
library(remotes)
seasons <- 2013:2020
pbp <- purrr::map_df(seasons, function(x) {
readRDS(
url(
glue::glue("https://raw.githubusercontent.com/guga31bb/nflfastR-data/master/data/play_by_play_{x}.rds")
)
)
})
pbp_rp <- pbp %>%
filter(!is_na(epa), play_type=="no_play" | play_type=="pass" | play_type=="run")
pbp_rp <- pbp_rp %>%
mutate(
pass = if_else(str_detect(desc, "( pass)|(sacked)|(scramble)"), 1, 0),
rush = if_else(str_detect(desc, "(left end)|(left tackle)|(left guard)|(up the middle)|(right guard)|(right tackle)|(right end)") & pass == 0, 1, 0),
success = ifelse(epa>0, 1 , 0)
)
pbp_rp <- pbp_rp %>% filter(pass==1 | rush==1)
pbp_rp <- pbp_rp %>%
mutate(season = substr(old_game_id, 1, 4))
pbp_rp <- pbp_rp %>%
mutate(
posteam = case_when(
posteam == 'OAK' ~ 'LV',
posteam == 'SD' ~ 'LAC',
posteam == 'STL' ~ 'LA',
TRUE ~ posteam
)
)
pbp_rp <- pbp_rp %>%
mutate(
defteam = case_when(
defteam == 'OAK' ~ 'LV',
defteam == 'SD' ~ 'LAC',
defteam == 'STL' ~ 'LA',
TRUE ~ defteam
)
)
run_epa <- pbp_rp %>%
filter(qb_dropback == 0) %>%
filter(qb_scramble == 0) %>%
filter(rush_attempt==1) %>%
group_by(posteam, season) %>%
summarize(rush_epa = mean(epa, na.rm=T))
def_epa <- pbp_rp %>%
group_by(defteam, season) %>%
summarize(def_epa = mean(epa, na.rm = T))
names(run_epa)[names(run_epa) == "posteam"] <- "team"
names(def_epa)[names(def_epa) == "defteam"] <- "team"
help_epa <- merge(run_epa, def_epa, by = c("team", "season"))
help_epa <- help_epa %>%
mutate(total_help = rush_epa - def_epa) %>%
arrange(total_help)
standings <- read_csv("http://www.habitatring.com/standings.csv")
standings <- standings %>%
mutate(
team = case_when(
team == 'OAK' ~ 'LV',
team == 'SD' ~ 'LAC',
team == 'STL' ~ 'LA',
TRUE ~ team
)
)
filtered_standings <- standings %>%
select(season, team, wins, losses, ties, pct) %>%
filter(season >= 2013)
filtered_standings <- filtered_standings %>%
mutate(total_wins = ifelse(ties==1, wins + 0.5, wins)) %>%
select(season, team, total_wins)
#Download the CSV and fix 2020 wins/losses and re-upload
filtered_standings$season <- as.numeric(filtered_standings$season)
help_epa$season <- as.numeric(help_epa$season)
standings_with_epa <- filtered_standings %>%
left_join(help_epa, by = c("team", "season"))
ggplot(standings_with_epa, aes(x=total_help, y=total_wins)) +
geom_point() +
geom_abline()
#Download standings_with_epa to manually add PFF grades
write.csv(standings_with_epa, "standings_with_epa2.csv")
every_year_help <- standings_with_epa %>%
group_by(team) %>%
summarize(final_help = mean(total_help)) %>%
arrange(desc(final_help)) %>%
filter(!is.na(final_help))
qb_dropbacks <- pbp_rp %>%
filter(qb_dropback == 1)
mean(qb_dropbacks$epa) #0.05
#Upload CSV with PFF grades now included
pwaa_with_teams <- read.csv("~/Downloads/pwaa_with_teams.csv")
pwaa_no_teams <- pwaa_with_teams %>%
select(-season, -team)
pwaa_no_teams %>%
cor(use="complete.obs") %>%
round(2)
fit <- lm(total_wins ~ rush_epa + def_epa + pass_block_grade + receiving_grade, data = pwaa_with_teams)
summary(fit)
preds <- predict(fit, pwaa_with_teams) %>%
as_tibble() %>%
rename(proj_wins = value) %>%
round(1) %>%
bind_cols(
pwaa_with_teams %>% select(team, season, total_wins)
)
preds <- preds %>%
mutate(pwaa = total_wins - proj_wins)
teams_colors_logos <- teams_colors_logos
preds2 <- preds %>%
left_join(teams_colors_logos, by = c('team' = 'team_abbr'))
preds2 <- preds2 %>%
select(proj_wins, team, season, total_wins, pwaa, team_color, team_color2, team_logo_espn)
preds3 <- preds2 %>%
left_join(headshots, by = c('season', 'team'))
preds_2020 <- preds3 %>%
filter(season == 2020) %>%
arrange(desc(pwaa))
preds_2019 <- preds2 %>%
filter(season == 2019) %>%
arrange(desc(pwaa))
preds_2018 <- preds2 %>%
filter(season == 2018) %>%
arrange(desc(pwaa))
all_pred_years <- preds %>%
group_by(team) %>%
summarize(mean_proj_wins = mean(proj_wins),
mean_wins = mean(total_wins),
mean_pwaa = mean(pwaa)) %>%
arrange(desc(mean_proj_wins))
remotes::install_github("jthomasmock/espnscrapeR")
qb_epa <- qb_dropbacks %>%
group_by(passer, season, posteam) %>%
summarize(plays = n(),
qb_epa = mean(epa, na.rm=T)) %>%
filter(plays > 50)
qb_epa_top_plays <- qb_epa %>%
group_by(season, posteam) %>%
top_n(1, plays)
qbr_data_20 <- espnscrapeR::get_nfl_qbr(2020)
qbr_data_19 <- espnscrapeR::get_nfl_qbr(2019)
qbr_data_18 <- espnscrapeR::get_nfl_qbr(2018)
qbr_data_17 <- espnscrapeR::get_nfl_qbr(2017)
qbr_data_16 <- espnscrapeR::get_nfl_qbr(2016)
qbr_data_15 <- espnscrapeR::get_nfl_qbr(2015)
qbr_data_14 <- espnscrapeR::get_nfl_qbr(2014)
qbr_data_13 <- espnscrapeR::get_nfl_qbr(2013)
qbr_data_all <- rbind(qbr_data_13, qbr_data_14, qbr_data_15, qbr_data_16,
qbr_data_17, qbr_data_18, qbr_data_19, qbr_data_20) %>%
select(season, team, short_name, headshot_href)
qbr_data_all <- qbr_data_all %>%
mutate(
team = case_when(
team == 'OAK' ~ 'LV',
team == 'SD' ~ 'LAC',
team == 'STL' ~ 'LA',
TRUE ~ team
)
)
names(qbr_data_all)[names(qbr_data_all) == "short_name"] <- "passer"
names(qb_epa_top_plays)[names(qb_epa_top_plays) == "posteam"] <- "team"
qb_epa_top_plays$season <- as.integer(qb_epa_top_plays$season)
qbr_data_all$season <- as.integer(qbr_data_all$season)
qbr_data_all <- qbr_data_all %>%
distinct(passer, .keep_all = TRUE) %>%
select(-season, -team)
qb_epa_headshots <- qb_epa_top_plays %>%
left_join(qbr_data_all, by = "passer")
write.csv(qb_epa_headshots, "qb_epa_headshots.csv")
write.csv(qbr_data_all, "qbr_data_all.csv")
headshots <- read.csv("~/Downloads/headshots.csv")
################################# G R A P H S ################################
preds_2020 <- preds_2020 %>%
mutate(rank = row_number())
asp_ratio <- 1.618
link_to_img <- function(x, width = 50) {
glue::glue("<img src='{x}' width='{width}'/>")
}
bar_plot <- preds_2020 %>%
mutate(label = link_to_img(headshot_href),
rank = as.integer(rank)) %>%
ggplot() +
geom_col(
aes(
x = rank, y = pwaa,
fill = team_color, color = team_color2
),
width = 0.4
) +
scale_color_identity(aesthetics = c("fill", "color")) +
geom_hline(yintercept = 0, color = "black", size = 1) +
theme_minimal() +
scale_x_continuous(breaks = c(1, seq(5, 30, by = 5)), limits = c(0.5, 34)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
labs(x = NULL,
y = "PWAA\n",
title = "Passer Wins Above Average (PWAA) | 2020 Season",
subtitle = "PWAA is a linear regression model that simulates an average quarterback with every team's run game, defense, pass blocking and receiving",
caption = "By Tej Seth | @mfbanalytics") +
theme(
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold", size = 20, hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5),
axis.text = element_text(size = 14, face = "bold"),
axis.title.y = element_text(size = 16, face = "bold")
)
bar_plot
qb_col_img <- bar_plot +
geom_image(
aes(
x = rank, y = pwaa,
image = headshot_href
),
size = 0.06
)
qb_col_img
ggsave(
"pwaa-1.png", qb_col_img,
height = 10, width = 16, dpi = "retina"
)
#################################################
preds_2020 %>%
ggplot() +
geom_link(
mapping = aes(x = proj_wins, y = rank, xend = total_wins, yend = rank, size = 2, color = team_color)
) +
theme_bw() +
scale_colour_identity() +
geom_image(aes(x = proj_wins, y = rank, image = team_logo_espn), size = 0.04, asp = 16/9) +
geom_image(aes(x = total_wins, y = rank, image = headshot_href), size = 0.04, asp = 16/9) +
labs(
x = "Team Wins",
y = "",
title = "Each Team's Passer Wins Over Average",
subtitle = "The team logo is for a team's wins with an average quarterback and the face of each quarterback is how many wins they added",
caption = "By Tej Seth | @mfbanalytics using code from @benbbaldwin"
) +
theme(
plot.title = element_markdown(hjust = 0.5, size = 20, face = "bold"),
plot.subtitle = element_markdown(hjust = 0.5, size = 12),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
legend.position = "none",
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.background = element_blank(),
panel.border= element_blank()
) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_y_reverse(breaks = scales::pretty_breaks(n = 10))
ggsave('pwaa-2.png', dpi=300, height=9*.8, width=16*.8)
###########################################################
scatter_plot <- preds_2020 %>%
mutate(label = link_to_img(headshot_href),
rank = as.integer(rank)) %>%
ggplot() +
geom_smooth(aes(x = pwaa, y = qb_epa), method = "lm", color = "grey") +
ggrepel::geom_text_repel(
aes(x = pwaa, y = qb_epa, label = passer),
box.padding = 0.5, fontface = "bold", size = 6
) +
geom_point(
aes(x = pwaa, y = qb_epa, size = plays, fill = team_color, color = team_color2),
shape = 21
) +
scale_color_identity(aesthetics = c("fill", "color")) +
scale_size(name = "Plays") +
theme_minimal() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
labs(x = "Passer Wins Above Average",
y = "QB's EPA/Play",
title = "PWAA and EPA/Play Are Correlated",
subtitle = "PWAA is a linear regression model that simulates an average quarterback with every team's run game, defense, pass blocking and receiving",
caption = "By Tej Seth | @mfbanalytics using code from @thomas_mock") +
theme(
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold", size = 20, hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5),
axis.text = element_text(size = 14),
axis.title.y = element_text(size = 14)
)
scatter_plot
ggsave(
"pwaa-3.png", scatter_plot,
height = 10, width = 16, dpi = "retina"
)
############################################################
all_pred_years <- all_pred_years %>%
left_join(teams_colors_logos, by = c('team' = 'team_abbr'))
all_pred_years %>%
ggplot(aes(x = mean_proj_wins, y = mean_wins)) +
geom_hline(yintercept = mean(all_pred_years$mean_wins), color = "blue", linetype = "dashed", alpha=0.5) +
geom_vline(xintercept = mean(all_pred_years$mean_proj_wins), color = "blue", linetype = "dashed", alpha=0.5) +
geom_image(aes(image = team_logo_espn), size = 0.05, asp = 16 / 9) +
stat_smooth(geom='line', alpha=0.5, se=FALSE, method='lm')+
labs(x = "Wins With Average Quarterback Per Year",
y = "Actual Wins Per Year",
title = "How Each Team Has Peformed Since 2013",
caption = "By Tej Seth | @mfbanalytics") +
theme_bw() +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 14, hjust = 0.5, face = "bold")
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
annotate("text", x = 9.8, y = 9.2, label = "Good Franchise \n Good QB Play", color = "blue") +
annotate("text", x = 9.8, y = 6, label = "Good Franchise \n Bad QB Play", color = "blue") +
annotate("text", x = 6, y = 7, label = "Bad Franchise \n Bad QB Play", color = "blue") +
annotate("text", x = 6, y = 9, label = "Bad Franchise \n Good QB Play", color = "blue")
ggsave('pwaa-4.png')
#####################################################################
every_qb_pef <- merge(headshots, preds, by = c("season", "team"))
Mode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
qb_pwaa_stats <- every_qb_pef %>%
group_by(passer, headshot_href) %>%
summarize(seasons = n(),
mean_pwaa = mean(pwaa, na.rm = T),
total_pwaa = sum(pwaa, na.rm = T),
mean_proj_wins = mean(proj_wins, na.rm = T),
most_freq_team = Mode(team)) %>%
filter(seasons > 2) %>%
arrange(desc(total_pwaa))
qb_pwaa_stats <- qb_pwaa_stats %>%
mutate(rank = row_number())
qb_pwaa_stats <- qb_pwaa_stats %>%
left_join(teams_colors_logos, by = c('most_freq_team' = 'team_abbr'))
side_plot <- qb_pwaa_stats %>%
mutate(label = link_to_img(headshot_href),
rank = as.integer(rank)) %>%
ggplot() +
geom_col(
aes(
x = total_pwaa, y = fct_reorder(passer, total_pwaa),
fill = team_color, color = team_color2
),
width = 0.4
) +
scale_color_identity(aesthetics = c("fill", "color")) +
geom_vline(xintercept = 0, color = "black", size = 1) +
theme_minimal() +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
labs(x = "Total Passer Wins Above Average",
y = "",
title = "Total PWAA by Each Quarterback Since 2013",
subtitle = "Minimum of 3 seasons played",
caption = "By Tej Seth | @mfbanalytics") +
theme(
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold", size = 20, hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5),
axis.text = element_text(size = 8),
axis.title.y = element_text(size = 14)
)
side_plot
ggsave("pwaa-5.png")
#####################################################################
top_15 <- every_qb_pef %>% arrange(desc(pwaa)) %>% slice(1:15)
top_15 <- top_15 %>%
left_join(teams_colors_logos, by = c('team' = 'team_abbr'))
top_15 <- top_15 %>%
mutate(rank = row_number())
top_15 <- top_15 %>%
mutate(text_label = (total_wins + proj_wins) / 2)
top_15 %>%
ggplot() +
geom_link(
mapping = aes(x = proj_wins, y = rank, xend = total_wins, yend = rank, size = 2, color = team_color)
) +
theme_bw() +
scale_colour_identity() +
geom_image(aes(x = proj_wins, y = rank, image = team_logo_espn), size = 0.04, asp = 16/9) +
geom_image(aes(x = total_wins, y = rank, image = headshot_href), size = 0.04, asp = 16/9) +
geom_text(aes(x = text_label, y = rank, label = season), color = "white") +
labs(
x = "Team Wins",
y = "",
title = "The Top 15 Seasons in PWAA Since 2013",
subtitle = "The team logo is for a team's wins with an average quarterback and the face of each quarterback is how many wins they added",
caption = "By Tej Seth | @mfbanalytics using code from @benbbaldwin"
) +
theme(
plot.title = element_markdown(hjust = 0.5, size = 20, face = "bold"),
plot.subtitle = element_markdown(hjust = 0.5, size = 12),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
legend.position = "none",
panel.grid.major.y = element_blank(),
panel.grid.minor.y = element_blank(),
panel.background = element_blank(),
panel.border= element_blank()
) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_y_reverse(breaks = scales::pretty_breaks(n = 10))
ggsave('pwaa-6.png', dpi=300, height=9*.8, width=16*.8)
#########################################################
tompa_bay <- every_qb_pef %>%
filter(team == "TB")
tompa_bay <- tompa_bay %>%
left_join(teams_colors_logos, by = c('team' = 'team_abbr'))
tompa_bay %>%
ggplot() +
theme_bw() +
scale_colour_identity() +
geom_line(aes(x=season, y=proj_wins, color = team_color)) +
geom_line(aes(x=season, y=total_wins, color = team_color2)) +
geom_image(aes(x=season, y=proj_wins, image = team_logo_espn), size = 0.05, asp = 16/9) +
geom_image(aes(x=season, y=total_wins, image = headshot_href), size = 0.05, asp = 16/9) +
labs(
x = "Season",
y = "Wins",
title = "2020 Brady is Average But That's All Tampa Bay Needed",
subtitle = "A team's logo is how many wins they'd have with an average quarterback and the quarterback's face is how many wins they actually got to",
caption = "By Tej Seth | @mfbanalytics"
) +
theme(
panel.grid.minor = element_blank(),
plot.title = element_text(face = "bold", size = 20, hjust = 0.5),
plot.subtitle = element_text(size = 10, hjust = 0.5),
axis.text = element_text(size = 8),
axis.title.y = element_text(size = 14)
) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10))
ggsave('pwaa-7.png', dpi = 300)
#########################################################
tab_data <- preds_2020 %>%
mutate(RK = rank(desc(pwaa)),
RK = as.integer(RK)) %>%
select(RK, passer, headshot_href, proj_wins, total_wins, pwaa)
tab_function <- function(data, ...){
data %>%
gt() %>%
text_transform(
locations = cells_body(vars(headshot_href)),
fn = function(x){
web_image(
url = x,
height = px(30)
)
}
) %>%
cols_label(
RK = "Rank",
passer = "Quarterback",
headshot_href = "",
proj_wins = "Avg. QB Wins",
total_wins = "Actual Wins",
pwaa = "PWAA") %>%
data_color(
columns = vars(pwaa),
colors = scales::col_numeric(
palette = c("#af8dc3", "#f7f7f7", "#7fbf7b"),
domain = c(-5, 5)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
columns = vars(RK, passer)
)
) %>%
tab_options(
column_labels.background.color = "white",
column_labels.font.weight = "bold",
table.border.top.width = px(3),
table.border.top.color = "transparent",
table.border.bottom.color = "transparent",
table.border.bottom.width = px(3),
column_labels.border.top.width = px(3),
column_labels.border.top.color = "transparent",
column_labels.border.bottom.width = px(3),
column_labels.border.bottom.color = "black",
data_row.padding = px(3),
source_notes.font.size = 12,
table.font.size = 16,
heading.align = "left",
...
) %>%
opt_table_font(
font = list(
default_fonts()
)
)
}
gt_tab1 <- tab_data %>%
slice(1:16) %>%
tab_function()
gt_tab1
gtsave(gt_tab1, "gt-tab1.png")
gt_tab2 <- tab_data %>%
slice(17:32) %>%
tab_function() %>%
tab_style(
style = cell_borders(
sides = "left",
color = "black",
weight = px(3)
),
locations =
list(
cells_body(
columns = 1
),
cells_column_labels(1)
)
)
gtsave(gt_tab2, "gt-tab2.png")
img1 <- magick::image_read("gt-tab1.png")
img2 <- magick::image_read("gt-tab2.png")
img3 <- magick::image_append(c(img1, img2))
img3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment