Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save benjaminrobinson/b8f796433a79908897c03178f545d73f to your computer and use it in GitHub Desktop.
Save benjaminrobinson/b8f796433a79908897c03178f545d73f to your computer and use it in GitHub Desktop.
Exploring the Relationship Between Regular Season and Postseason Success
library(tidyverse)
library(gghighlight)
library(ggthemes)
read_csv("https://github.com/leesharpe/nfldata/raw/master/data/games.csv") %>%
select(season,
game_type,
game_id,
home_score,
away_score,
home_team,
away_team) %>%
mutate(
season_type = ifelse(game_type == 'REG', 'Regular', 'Post'),
winner = ifelse(
home_score > away_score,
home_team,
ifelse(away_score > home_score, away_team, 'Tie')
),
loser = ifelse(
home_score > away_score,
away_team,
ifelse(away_score > home_score, home_team, 'Tie')
)
) -> tmp
bind_rows(
tmp %>%
select(season, season_type, game_id, team = home_team, winner, loser) %>%
mutate(
team = case_when(
team %in% c('LA', 'STL') ~ 'LAR',
team == 'SD' ~ 'LAC',
team == 'OAK' ~ 'LV',
team == 'WAS' ~ 'WFT',
TRUE ~ team
),
winner = case_when(
winner %in% c('LA', 'STL') ~ 'LAR',
winner == 'SD' ~ 'LAC',
winner == 'OAK' ~ 'LV',
winner == 'WAS' ~ 'WFT',
TRUE ~ winner
),
loser = case_when(
loser %in% c('LA', 'STL') ~ 'LAR',
loser == 'SD' ~ 'LAC',
loser == 'OAK' ~ 'LV',
loser == 'WAS' ~ 'WFT',
TRUE ~ loser
),
w = ifelse(team == winner, 1,
ifelse(winner == 'Tie', NA, 0))
),
tmp %>%
select(season, season_type, game_id, team = away_team, winner, loser) %>%
mutate(
team = case_when(
team %in% c('LA', 'STL') ~ 'LAR',
team == 'SD' ~ 'LAC',
team == 'OAK' ~ 'LV',
team == 'WAS' ~ 'WFT',
TRUE ~ team
),
w = ifelse(team == winner, 1,
ifelse(winner == 'Tie', NA, 0))
)
) %>%
group_by(season_type, team) %>%
summarize(
n = n(),
wins = sum(w, na.rm = TRUE),
ties = sum(is.na(w)),
losses = n - wins - ties,
win_perc = wins / n,
losing_perc = losses / n,
.groups = 'drop'
) %>%
gather(key, value,-team,-season_type) %>%
filter(key == 'win_perc') %>%
spread(season_type, value) %>%
select(-key) -> dat
ggplot(dat, aes(x = Regular, y = Post, group = team)) +
geom_point(size = 3) +
theme_fivethirtyeight() +
scale_x_continuous(labels = scales::percent, limits = c(0, 1)) +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
geom_abline(
slope = 1,
intercept = 0,
linetype = 2,
color = 'red'
) +
gghighlight(team == 'CIN', use_direct_label = TRUE) +
labs(
x = "Regular Season Winning Percentage",
y = "Postseason Winning Percentage",
title = "Testing a Hypothesis",
subtitle = "Regular vs Postseason Success (1999 - 2020)",
caption = "Chart by: Benjamin Robinson (@benj_robinson) | Source: NFLGameData.com, @LeeSharpeNFL"
)
ggsave(
"REG_POST_WINNING_PERCENTAGE_RELATIONSHIP.png",
width = 8,
height = 8,
units = 'in',
dpi = 96
)
lm(data = dat, formula = Post ~ Regular) %>%
summary
cor(dat$Regular, dat$Post)
dat$pred <- predict(
object = lm(data = dat, formula = Post ~ Regular),
newdata = dat)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment