Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
library(tidyverse)
library(ggrepel)
library(ggimage)
library(ggtext)
library(mgcv)
library(scales)
library(ggforce)
library(nflfastR)
library(na.tools)
library(bayesboot)
library(corrgram)
library(GGally)
library(corrplot)
library(gt)
library(ggplot2)
library(viridis)
library(hrbrthemes)
proe_by_season <- pbp_rp %>%
filter(!is.na(pass_oe)) %>%
group_by(posteam, season) %>%
summarize(proe = mean(pass_oe, na.rm = T))
proe_by_season <- proe_by_season %>%
left_join(teams_colors_logos, by = c('posteam' = 'team_abbr'))
proe_by_season$season <- as.numeric(proe_by_season$season)
lions_proe <- proe_by_season %>%
filter(posteam == "DET")
proe_by_season %>%
ggplot(aes(x = season, y=proe)) +
geom_jitter(aes(y = proe, fill = team_color),
size = 6, width = 0.02, show.legend=FALSE, alpha=.5) +
theme_bw() +
geom_hline(yintercept = 0, color = "black", alpha=1.0) +
scale_color_identity(aesthetics = c("fill", "color")) +
geom_image(aes(image = team_logo_espn, x=season, y=proe),
size = 0.05, asp = 16/9, data = titans_proe) +
geom_line(data = titans_proe, aes(color = team_color)) +
labs(x = "Season",
y = "Pass Rate Over Expected",
title = "Each Team's Pass Rate Over Expected, 2014-2020",
subtitle = "The Titans have been passing less every year") +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 22, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 16, hjust = 0.5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14)
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 6))
ggsave("titans-1.png", height = 10, width = 16, dpi = "retina")
##########################################################################
play.by.play <- read.csv("~/Downloads/Play By Play.csv")
facet.grades <- read.csv("~/Downloads/Facet Grades.csv")
play.by.play <- play.by.play %>%
mutate(pass = ifelse(rps == "P", 1, 0),
run = ifelse(rps == "R", 1, 0))
play.by.play <- play.by.play %>%
filter(pass == 1 | run == 1)
pa_rates <- play.by.play %>%
filter(!is.na(EPA)) %>%
group_by(offense, season) %>%
summarize(plays = n(),
pa_rate = sum(play_action) / plays,
epa_per_play = sum(EPA) / plays)
pa_rates <- pa_rates %>%
left_join(teams_colors_logos, by = c('offense' = 'team_abbr'))
titans_pa <- pa_rates %>%
filter(offense == "TEN")
pa_rates %>%
ggplot(aes(x = season, y=pa_rate)) +
geom_jitter(aes(y = pa_rate, fill = team_color),
size = 6, width = 0.02, show.legend=FALSE, alpha=.5) +
theme_bw() +
geom_hline(yintercept = mean(pa_rates$pa_rate), color = "black", alpha=1.0) +
scale_color_identity(aesthetics = c("fill", "color")) +
geom_image(aes(image = team_logo_espn, x=season, y=pa_rate),
size = 0.05, asp = 16/9, data = titans_pa) +
geom_line(data = titans_pa, aes(color = team_color)) +
labs(x = "Season",
y = "Play Action Rate",
title = "Each Team's Play Action Rate, 2014-2020",
subtitle = "The Titans have been using more play-action passes every year") +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 22, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 16, hjust = 0.5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14)
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 6))
ggsave("titans-2.png", height = 10, width = 16, dpi = "retina")
##########################################################################
play.by.play2 <- play.by.play %>%
mutate(score_diff = off_score - def_score) %>%
filter(score_diff >= -10)
play.by.play2 <- play.by.play2 %>%
filter(score_diff < 11)
ed_run_rates <- play.by.play2 %>%
filter(!is.na(EPA)) %>%
filter(down <= 2) %>%
group_by(offense, season, week) %>%
summarize(total = n(),
run_rate = sum(run==1) / n())
pa_games <- play.by.play2 %>%
filter(!is.na(EPA)) %>%
filter(play_action == 1) %>%
group_by(offense, season, week) %>%
summarize(pa_passes = n(),
pa_epa = mean(EPA))
pa_games <- pa_games %>%
left_join(ed_run_rates)
pa_games <- pa_games %>%
filter(pa_passes > 4)
pa_games <- pa_games %>%
left_join(teams_colors_logos, by = c("offense" = "team_abbr"))
titans_pa_games <- pa_games %>%
filter(offense == "TEN")
ggplot(titans_pa_games, aes(x=run_rate, y=pa_epa, color = team_color)) +
geom_jitter(aes(y = pa_epa, fill = team_color),
size = 6, width = 0.02, show.legend=FALSE, alpha=.5) +
scale_color_identity(aesthetics = c("fill", "color")) +
stat_smooth(geom='line', alpha=1.0, se=FALSE, method='lm') +
theme_bw() +
labs(x = "Early Down Run Rate",
y = "Play Action EPA/Play",
title = "The Titans Play Action EPA/Play Compared to Their Run Rate",
subtitle = "Score differential is between -10 and 10") +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 22, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 16, hjust = 0.5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14)
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
annotate("text", x = 0.67, y = 0.30, label = "R^2 = 0.00", size = 6)
ggplot(pa_games, aes(x=run_rate, y=pa_epa, color = "orange")) +
geom_jitter(aes(y = pa_epa, fill = "orange"),
size = 2, width = 0.02, show.legend=FALSE, alpha=.5) +
stat_smooth(geom='line', alpha=1.0, se=FALSE, method='lm', color = "black") +
theme_bw() +
labs(x = "Early Down Run Rate",
y = "Play Action EPA/Play",
title = "Running the Ball More Doesn't Increase Play-Action EPA/Play",
subtitle = "Score differential is between -10 and 10") +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 22, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 16, hjust = 0.5),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.position = "none"
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10)) +
annotate("text", x = 0.75, y = 0.32, label = "R^2 = 0.00", size = 6)
ggsave("titans-4.png", height = 10, width = 16, dpi = "retina")
rsq <- function(x, y) summary(lm(y~x))$r.squared
rsq(pa_games$run_rate, pa_games$pa_epa)
##########################################################################
facet.grades <- facet.grades %>%
left_join(teams_colors_logos, by = c("team" = "team_name"))
corr_grades <- facet.grades %>%
select(pf, pa, over, off, pass, pblk, recv, run, rblk, def, rdef, tack, prsh, cov, spec)
corrgram(corr_grades, order=NULL, lower.panel=panel.shade, upper.panel=NULL, text.panel=panel.txt, main="PFF Grades Correlated")
ggcorr(corr_grades, method = c("everything", "pearson"))
corrplot(cor(corr_grades))
select_grades <- facet.grades %>%
select(team_abbr, pblk, rblk, recv, run)
##########################################################################
lions_o <- play.by.play %>%
filter(offense == "DET") %>%
group_by(offense, season, rps) %>%
summarize(mean_epa = mean(EPA, na.rm = T))
lions_o <- lions_o %>%
pivot_wider(names_from = rps, values_from = mean_epa)
lions_o <- lions_o %>%
left_join(lions_proe)
lions_o <- lions_o %>%
select(season, team_wordmark, P, R, proe)
lions_o <- lions_o %>%
mutate_if(is.numeric, ~round(., 2))
tab_data <- lions_o %>%
select(season, team_wordmark, P, R, proe)
write.csv(tab_data, "tab_data.csv")
tab_data <- read.csv("~/Syracuse Blitz/tab_data.csv")
lions_tab <- tab_data %>%
gt() %>%
text_transform(
locations = cells_body(vars(team_wordmark)),
fn = function(x){
web_image(
url = x,
height = px(30)
)
}
) %>%
cols_label(
season = "Season",
team_wordmark = "",
P = "EPA/Pass",
R = "EPA/Rush",
proe = "PROE") %>%
data_color(
columns = vars(proe),
colors = scales::col_numeric(
palette = c("#af8dc3", "#f7f7f7", "#7fbf7b"),
domain = c(-2, 7)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
columns = vars(P, R)
)
) %>%
tab_header(
title = "The Lions Offensive Breakdown from 2014 to 2020",
subtitle = "Matthew Stafford was never given any running game help"
) %>%
tab_options(
column_labels.background.color = "white",
column_labels.font.weight = "bold",
table.border.top.width = px(3),
table.border.top.color = "transparent",
table.border.bottom.color = "transparent",
table.border.bottom.width = px(3),
column_labels.border.top.width = px(3),
column_labels.border.top.color = "transparent",
column_labels.border.bottom.width = px(3),
column_labels.border.bottom.color = "black",
data_row.padding = px(3),
source_notes.font.size = 12,
table.font.size = 16,
heading.align = "middle",
heading.title.font.weight = "bold"
) %>%
opt_table_font(
font = list(
default_fonts()
)
)
gtsave(lions_tab, "lions_tab.png")
##########################################################################
steelers_o <- play.by.play %>%
filter(offense == "PIT") %>%
group_by(offense, season, rps) %>%
summarize(mean_epa = mean(EPA, na.rm = T))
steelers_o <- steelers_o %>%
pivot_wider(names_from = rps, values_from = mean_epa)
steelers_o <- steelers_o %>%
left_join(steelers_proe)
steelers_o <- steelers_o %>%
select(season, team_wordmark, P, R, proe)
steelers_o <- steelers_o %>%
mutate_if(is.numeric, ~round(., 2))
tab_data <- steelers_o %>%
select(season, team_wordmark, P, R, proe)
write.csv(tab_data, "tab_data.csv")
steelers_tab <- tab_data %>%
gt() %>%
text_transform(
locations = cells_body(vars(team_wordmark)),
fn = function(x){
web_image(
url = x,
height = px(30)
)
}
) %>%
cols_label(
season = "Season",
team_wordmark = "",
P = "EPA/Pass",
R = "EPA/Rush",
proe = "PROE") %>%
data_color(
columns = vars(proe),
colors = scales::col_numeric(
palette = c("#af8dc3", "#f7f7f7", "#7fbf7b"),
domain = c(-2, 10)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
columns = vars(P, R)
)
) %>%
tab_header(
title = "The Steelers Offensive Breakdown from 2014 to 2020",
subtitle = "The Steelers have had a passing explosion because of distrust in their run game"
) %>%
tab_options(
column_labels.background.color = "white",
column_labels.font.weight = "bold",
table.border.top.width = px(3),
table.border.top.color = "transparent",
table.border.bottom.color = "transparent",
table.border.bottom.width = px(3),
column_labels.border.top.width = px(3),
column_labels.border.top.color = "transparent",
column_labels.border.bottom.width = px(3),
column_labels.border.bottom.color = "black",
data_row.padding = px(3),
source_notes.font.size = 12,
table.font.size = 16,
heading.align = "middle",
heading.title.font.weight = "bold"
) %>%
opt_table_font(
font = list(
default_fonts()
)
)
gtsave(steelers_tab, "steelers_tab.png")
##########################################################################
play.by.play <- play.by.play %>%
mutate(yard_zone = case_when(
yards_to_go > 75 ~ "Deep in own (1-24)",
yards_to_go <= 75 & yards_to_go >= 41 ~ "Touchback to FG (25-Opponent 41)",
yards_to_go <= 40 & yards_to_go > 20 ~ "Common FG to Redzone (40-21)",
yards_to_go <= 20 & yards_to_go > 10 ~ "Start to Middle of Redzone (20-11)",
yards_to_go <= 10 ~ "Middle of Redzone to Goalline (10-1)"
))
unique(play.by.play$yard_zone)
titans_zones <- play.by.play %>%
filter(!is.na(yard_zone)) %>%
filter(offense == "TEN") %>%
group_by(offense, season, rps, yard_zone) %>%
summarize(plays = n(),
mean_epa = mean(EPA))
write.csv(titans_zones, "titans_zones.csv")
ggplot(titans_zones, aes(fill=Type, y=mean_epa, x=season)) +
geom_bar(position="dodge", stat="identity") +
scale_fill_viridis(discrete = T, option = "E") +
facet_wrap(~yard_zone) +
theme_bw() +
labs(x = "Year",
y = "EPA/Play",
title = "The Titans Passing Vs. Running in Each Yardage Zone",
subtitle = "") +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 22, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 16, hjust = 0.5),
axis.text = element_text(size = 8),
axis.title = element_text(size = 14)
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 6))
##########################################################################
steelers_zones <- play.by.play %>%
filter(!is.na(yard_zone)) %>%
filter(offense == "PIT") %>%
group_by(offense, season, rps, yard_zone) %>%
summarize(plays = n(),
mean_epa = mean(EPA))
colnames(steelers_zones)[which(names(steelers_zones) == "rps")] <- "Type"
write.csv(steelers_zones, "steelers_zones.csv")
ggplot(steelers_zones, aes(fill=Type, y=mean_epa, x=season)) +
geom_bar(position="dodge", stat="identity") +
scale_fill_viridis(discrete = T, option = "E") +
facet_wrap(~yard_zone) +
theme_bw() +
labs(x = "Year",
y = "EPA/Play",
title = "The Steelers Passing Vs. Running in Each Yardage Zone",
subtitle = "") +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 22, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 16, hjust = 0.5),
axis.text = element_text(size = 8),
axis.title = element_text(size = 14)
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 5)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 6))
#################################################################
pbp_filter <- pbp_rp %>%
filter(score_differential > -11)
pbp_filter <- pbp_filter %>%
filter(score_differential < 11)
off_20 <- pbp_filter %>%
filter(season == 2020) %>%
group_by(posteam) %>%
summarize(off_epa = mean(epa),
pass_rate = mean(pass))
off_20 <- off_20 %>%
left_join(teams_colors_logos, by = c('posteam' = 'team_abbr'))
off_20 %>%
ggplot(aes(x = pass_rate, y = off_epa)) +
geom_hline(yintercept = mean(off_20$off_epa), color = "blue", linetype = "dashed", alpha=0.5) +
geom_vline(xintercept = mean(off_20$pass_rate), color = "blue", linetype = "dashed", alpha=0.5) +
geom_image(aes(image = team_logo_espn), size = 0.05, asp = 16 / 9) +
stat_smooth(geom='line', alpha=0.5, se=FALSE, method='lm')+
labs(x = "Pass Rate",
y = "Offensive EPA/Play",
title = "Each Team's Offensive EPA/Play and Pass Rate",
subtitle = "Score differential between -10 and 10") +
theme_bw() +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 22, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 16, hjust = 0.5),
axis.text = element_text(size = 8),
axis.title = element_text(size = 14)
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10))
ggsave("pass-rate-epa.png", height = 10, width = 16, dpi = "retina")
off_20_high <- off_20 %>%
filter(off_epa > 0)
off_20_high %>%
ggplot(aes(x = pass_rate, y = off_epa)) +
geom_hline(yintercept = mean(off_20_high$off_epa), color = "blue", linetype = "dashed", alpha=0.5) +
geom_vline(xintercept = mean(off_20_high$pass_rate), color = "blue", linetype = "dashed", alpha=0.5) +
geom_image(aes(image = team_logo_espn), size = 0.05, asp = 16 / 9) +
stat_smooth(geom='line', alpha=0.5, se=FALSE, method='lm')+
labs(x = "Pass Rate",
y = "Offensive EPA/Play",
title = "Each Team's Offensive EPA/Play and Pass Rate",
subtitle = "Score differential between -10 and 10") +
theme_bw() +
theme(
aspect.ratio = 9 / 16,
plot.title = element_text(size = 22, hjust = 0.5, face = "bold"),
plot.subtitle = element_text(size = 16, hjust = 0.5),
axis.text = element_text(size = 8),
axis.title = element_text(size = 14)
) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 10)) +
scale_x_continuous(breaks = scales::pretty_breaks(n = 10))
###############################################################
titans_zones <- play.by.play %>%
filter(!is.na(yard_zone)) %>%
filter(offense == "TEN") %>%
group_by(offense, season, rps, yard_zone) %>%
summarize(plays = n(),
mean_epa = mean(EPA),
sum_epa = sum(EPA))
titans_20_zones <- titans_zones %>%
filter(season == 2020)
write.csv(titans_20_zones, "titans_20_zones2.csv")
teams_colors_logos %>% filter(team_abbr == "TEN") %>% select(team_wordmark)
titans_tab2 <- titans_20_zones2 %>%
gt() %>%
opt_row_striping() %>%
text_transform(
locations = cells_body(vars(offense)),
fn = function(x){
web_image(
url = x,
height = px(30)
)
}
) %>%
cols_label(
offense = "",
season = "Season",
rps = "Play Type",
yard_zone = "Yardage Zone",
plays = "Plays",
sum_epa = "Sum EPA",
optimal_pass = "Optimal %",
actual_pass = "Actual %",
opt_diff = "Difference") %>%
data_color(
columns = vars(opt_diff),
colors = scales::col_numeric(
palette = c("#af8dc3", "#f7f7f7", "#7fbf7b"),
domain = c(-0.5, 0.5)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
columns = vars(opt_diff)
)
) %>%
tab_header(
title = "The Titans Optimal Ratio by Zone",
subtitle = ""
) %>%
tab_options(
column_labels.background.color = "white",
column_labels.font.weight = "bold",
table.border.top.width = px(3),
table.border.top.color = "transparent",
table.border.bottom.color = "transparent",
table.border.bottom.width = px(3),
column_labels.border.top.width = px(3),
column_labels.border.top.color = "transparent",
column_labels.border.bottom.width = px(3),
column_labels.border.bottom.color = "black",
data_row.padding = px(3),
source_notes.font.size = 12,
table.font.size = 16,
heading.align = "middle",
heading.title.font.weight = "bold"
) %>%
opt_table_font(
font = list(
default_fonts()
)
)
titans_tab2
gtsave(titans_tab2, "titans_tab2.png")
###############################################################
steelers_zones <- play.by.play %>%
filter(!is.na(yard_zone)) %>%
filter(offense == "PIT") %>%
group_by(offense, season, rps, yard_zone) %>%
summarize(plays = n(),
mean_epa = mean(EPA),
sum_epa = sum(EPA))
steelers_20_zones <- steelers_zones %>%
filter(season == 2020)
write.csv(steelers_20_zones, "steelers_20_zones2.csv")
teams_colors_logos %>% filter(team_abbr == "PIT") %>% select(team_wordmark)
steelrs_tab2 <- steelers_20_zones %>%
gt() %>%
opt_row_striping() %>%
text_transform(
locations = cells_body(vars(offense)),
fn = function(x){
web_image(
url = x,
height = px(30)
)
}
) %>%
cols_label(
offense = "",
season = "Season",
rps = "Play Type",
yard_zone = "Yardage Zone",
plays = "Plays",
sum_epa = "Sum EPA",
optimal_pass = "Optimal %",
actual_pass = "Actual %",
opt_diff = "Difference") %>%
data_color(
columns = vars(opt_diff),
colors = scales::col_numeric(
palette = c("#af8dc3", "#f7f7f7", "#7fbf7b"),
domain = c(-0.5, 0.5)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
columns = vars(opt_diff)
)
) %>%
tab_header(
title = "The Steelers Optimal Ratio by Zone",
subtitle = ""
) %>%
tab_options(
column_labels.background.color = "white",
column_labels.font.weight = "bold",
table.border.top.width = px(3),
table.border.top.color = "transparent",
table.border.bottom.color = "transparent",
table.border.bottom.width = px(3),
column_labels.border.top.width = px(3),
column_labels.border.top.color = "transparent",
column_labels.border.bottom.width = px(3),
column_labels.border.bottom.color = "black",
data_row.padding = px(3),
source_notes.font.size = 12,
table.font.size = 16,
heading.align = "middle",
heading.title.font.weight = "bold"
) %>%
opt_table_font(
font = list(
default_fonts()
)
)
###############################################################
nfl_zones <- play.by.play %>%
filter(!is.na(yard_zone)) %>%
group_by(rps, yard_zone) %>%
summarize(plays = n(),
mean_epa = mean(EPA),
sum_epa = sum(EPA))
#nfl_zones <- nfl_zones %>%
# filter(season == 2020)
write.csv(nfl_zones, "nfl_zones2.csv")
nfl_zones2 <- read.csv("~/Syracuse Blitz/nfl_zones2.csv")
nfl_tab <- nfl_zones2 %>%
gt() %>%
opt_row_striping() %>%
text_transform(
locations = cells_body(vars(offense)),
fn = function(x){
web_image(
url = x,
height = px(30)
)
}
) %>%
cols_label(
offense = "",
season = "Season",
rps = "Play Type",
yard_zone = "Yardage Zone",
plays = "Plays",
sum_epa = "Sum EPA",
optimal_pass = "Optimal %",
actual_pass = "Actual %",
opt_diff = "Difference") %>%
data_color(
columns = vars(opt_diff),
colors = scales::col_numeric(
palette = c("#af8dc3", "#f7f7f7", "#7fbf7b"),
domain = c(-0.5, 0.5)
)
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
columns = vars(opt_diff)
)
) %>%
tab_header(
title = "The NFL Optimal Ratio by Zone, 2014-2020",
subtitle = ""
) %>%
tab_options(
column_labels.background.color = "white",
column_labels.font.weight = "bold",
table.border.top.width = px(3),
table.border.top.color = "transparent",
table.border.bottom.color = "transparent",
table.border.bottom.width = px(3),
column_labels.border.top.width = px(3),
column_labels.border.top.color = "transparent",
column_labels.border.bottom.width = px(3),
column_labels.border.bottom.color = "black",
data_row.padding = px(3),
source_notes.font.size = 12,
table.font.size = 16,
heading.align = "middle",
heading.title.font.weight = "bold"
) %>%
opt_table_font(
font = list(
default_fonts()
)
)
nfl_tab
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment