Skip to content

Instantly share code, notes, and snippets.

@friscojosh
Last active May 16, 2021 22:38
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save friscojosh/78bcfce99bd976ec91e8b4840f99eacd to your computer and use it in GitHub Desktop.
Save friscojosh/78bcfce99bd976ec91e8b4840f99eacd to your computer and use it in GitHub Desktop.
Defense doesn't matter
###########################################
### Testing whether YPA allowed and
### AYA allowed by a defense matters when
### projecting an offense's YPA or AYA
### coming into a particular week.
###########################################
### SQL used to generate the data from nfldb by burntsushi
### nflplay, a dependency is maintained seperately
# DROP TABLE IF EXISTS __defense_study;
# CREATE TABLE __defense_study AS
# SELECT a.gsis_id,
# c.season_year as season,
# c.week, a.passing_att,
# a.passing_cmp_air_yds,
# a.passing_incmp_air_yds,
# a.receiving_yac_yds,
# a.receiving_yds,
# a.receiving_rec,
# a.receiving_tds,
# a.rushing_yds,
# a.rushing_att,
# a.rushing_tds,
#
# CASE
# WHEN b.pos_team = c.home_team THEN c.away_team
# ELSE c.home_team
# END AS defense,
# CASE
# WHEN b.pos_team = c.home_team THEN c.home_team
# ELSE c.away_team
# END AS opponent
# FROM agg_play a
# INNER JOIN drive b ON a.drive_id = b.drive_id AND a.gsis_id = b.gsis_id
# INNER JOIN game c ON a.gsis_id = c.gsis_id
# WHERE c.season_type = 'Regular';
library(tidyverse)
library(broom)
library(knitr)
### Read in the data - available here: https://www.dropbox.com/s/y2nmzg5kjmh2kmi/__defense_study.csv?dl=0
defense <- read_csv("data/__defense_study.csv")
### We'll need this later
results <- data.frame()
### A for loop to own the libs
for (week_to_split in 3:16) {
print(paste("Testing week", week_to_split))
defense_no_2018 <- defense %>%
filter(season != 2018)
first_half_def <- defense_no_2018 %>%
filter(week <= week_to_split) %>%
filter(passing_att == 1) %>%
mutate(airyards = passing_cmp_air_yds + passing_incmp_air_yds,
pacr = ifelse(airyards == 0, receiving_yds, receiving_yds / airyards),
aya = round((receiving_yds + 20 * receiving_tds) / passing_att, 2)) %>%
group_by(defense, season) %>%
summarize(ypa = mean(receiving_yds),
aya = mean(aya))
first_half_off <- defense_no_2018 %>%
filter(week <= week_to_split) %>%
filter(passing_att == 1) %>%
mutate(airyards = passing_cmp_air_yds + passing_incmp_air_yds,
pacr = ifelse(airyards == 0, receiving_yds, receiving_yds / airyards),
aya = round((receiving_yds + 20 * receiving_tds) / passing_att, 2)) %>%
group_by(opponent, season) %>%
summarize(ypa = mean(receiving_yds),
aya = mean(aya))
week_to_test <- defense_no_2018 %>%
filter(week == week_to_split + 1) %>%
filter(passing_att == 1) %>%
mutate(airyards = passing_cmp_air_yds + passing_incmp_air_yds,
pacr = ifelse(airyards == 0, receiving_yds, receiving_yds / airyards),
aya = round((receiving_yds + 20 * receiving_tds) / passing_att, 2)) %>%
group_by(opponent, defense, season) %>%
summarize(ypa = mean(receiving_yds),
aya = mean(aya)) %>%
ungroup()
colnames(week_to_test) <- c("offense", "defense", "season", "test_ypa", "test_aya")
colnames(first_half_off) <- c("offense", "season", "off_ypa", "off_aya")
colnames(first_half_def) <- c("defense", "season", "def_ypa", "def_aya")
first_half_on_week <- first_half_off %>%
left_join(week_to_test, by = c("offense", "season")) %>%
na.omit() %>%
left_join(first_half_def, by = c("defense", "season"))
model_ypa_and_ypa_allowed <- lm(data = first_half_on_week, test_aya ~ off_aya + def_aya)
model_ypa_only <- lm(data = first_half_on_week, test_aya ~ off_aya)
model_ypa_allowed_only <- lm(data = first_half_on_week, test_aya ~ def_aya)
### glance() is a cool helper function that tidys up model summary info
both_res <- glance(model_ypa_and_ypa_allowed) %>%
mutate(model = "both",
week = week_to_split)
offense_res <- glance(model_ypa_only) %>%
mutate(model = "offense",
week = week_to_split)
defense_res <- glance(model_ypa_allowed_only) %>%
mutate(model = "defense",
week = week_to_split)
### bind it all up with twine
grouped <- both_res %>%
bind_rows(offense_res) %>%
bind_rows(defense_res)
### stuff it in our dataframe we created earlier
results <- rbind(results, grouped)
}
### plot the data for great visulaization
results %>% ggplot(aes(x = week, y = adj.r.squared, group = model, color = model)) +
geom_smooth(span = .3, se = FALSE)+
geom_point() +
theme_minimal() +
labs(title = "Adjusted r-squared of Offensive AYA, Defensive AYA and Both as Predictors of Week N AYA")
### munge the data for great clarity
results %>%
spread(key = model, value = adj.r.squared) %>%
select(week, both, defense, offense, p.value) %>%
mutate(both = ifelse(is.na(both), 0, both),
offense = ifelse(is.na(offense), 0, offense),
defense = ifelse(is.na(defense), 0, defense)) %>%
group_by(week) %>%
summarize(both_adj_r_sq = round(max(both), 3),
offense_adj_r_sq = round(max(offense), 3),
defense_adj_r_sq = round(max(defense), 3)) %>%
kable()
### what percentage of the week are each predictor variable significant at 0.05?
results %>%
mutate(significant = ifelse(p.value <= 0.05, 1, 0)) %>%
group_by(model) %>%
summarize(`significant?` = round(mean(significant), 2)) %>%
kable()
@jconnolly21
Copy link

Hey Josh -- I'm curious why you calculated AYA (lines 62, 72, 82) without including interceptions. I thought the calculation for AYA was (Passing Yards + (20 * Passing TDs) - (45 * Interceptions))/(Passing Attempts).

@friscojosh
Copy link
Author

I think I mention it in the tweet where I posted this, but it was deliberate. I wanted to filter out interceptions and just look at positive passing performance. INTs are very noisy and rare, and I wanted to give defense the best shot for stability possible.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment