Skip to content

Instantly share code, notes, and snippets.

@jthomasmock
Last active May 16, 2021 22:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jthomasmock/7d489f04e53812eacca8b94b6c8ee84a to your computer and use it in GitHub Desktop.
Save jthomasmock/7d489f04e53812eacca8b94b6c8ee84a to your computer and use it in GitHub Desktop.
library(rvest)
library(tidyverse)
library(ggrepel)
library(glue)
library(lubridate)
library(gt)
library(ggforce)
url <- "https://www.pro-football-reference.com/teams/comebacks.htm"
list_of_teams <- c("atl", "buf", "car", "chi", "cin", "cle", "clt", "crd", "dal", "den", "det",
"gnb", "htx", "jax", "kan", "mia", "min", "nor", "nwe", "nyg", "nyj",
"oti", "phi", "pit", "rai", "ram", "rav", "sdg", 'sea', "sfo", "tam", "was")
scrape_comeback <- function(team){
message(paste("Scraping", team))
Sys.sleep(5)
url <- glue::glue("https://www.pro-football-reference.com/teams/{team}/comebacks.htm")
url %>%
read_html() %>%
html_table() %>%
.[[1]] %>%
janitor::clean_names() %>%
as_tibble()
}
chiefs_score <- tibble(
tm = "Kansas City Chiefs",
x = "",
opp = "Houston Texans",
date = "Jan 12, 2020",
pf = 51,
pa = 31,
deficit = 24,
margin = 20
)
all_comebacks <- list_of_teams %>%
map_dfr(scrape_comeback)
plot_df <- all_comebacks %>%
mutate_at(vars(pf, pa, deficit), as.numeric) %>%
rowwise() %>%
mutate(margin = pf - pa) %>%
bind_rows(chiefs_score) %>%
mutate(swing = margin + deficit) %>%
mutate(date = str_remove(date, "\\*"),
date_iso = readr::parse_date(date, format = "%b %d, %Y"),
year = lubridate::year(date_iso)) %>%
arrange(desc(swing)) %>%
filter(year >= 1968)
margin_plot <- plot_df %>%
ggplot(aes(x = deficit, y= margin)) +
annotate('segment',
xend = 24, x = 24, y = 0, yend = 20, color = "red", size = 3, alpha = 0.2) +
geom_point(alpha = 0.5, aes(color = if_else(deficit >= 24 & margin >= 20, "red", "black")),
size = 3) +
# geom_text(x = 24, y = 20, label = "(24,20)",vjust = -1) +
geom_mark_circle(data = filter(plot_df, year == 2020, deficit == 24),
aes(label = paste(tm, 2020, sep = " - ")),
color = "red", label.colour = "red", con.colour = "red",
label.fill = NA) +
geom_hline(yintercept = 0) +
scale_color_identity() +
theme_minimal() +
labs(caption = "\nPlot: @thomas_mock | Data: ProFootballReference",
x = "\nLargest Deficit",
y = "Margin of Victory\n",
title = "KC's comeback from 24 points down was historic",
subtitle = "Teams with deficit of 24 points or more & margin of victory of 20 or more in red") +
theme(axis.title = element_text(face = "bold"),
panel.grid.minor = element_blank(),
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 16),
axis.text = element_text(size = 16),
axis.title.x = element_text(size = 20),
axis.title.y = element_text(size = 20)) +
scale_x_continuous(breaks = seq(5, 30, 5)) +
scale_y_continuous(breaks = seq(0, 30, 5))
margin_plot
ggsave("margin_plot.png", margin_plot, height = 10, width = 10, units = "in", dpi = "retina")
plot_df
gt_table <- plot_df %>%
filter(deficit >= 24) %>%
select(tm, opp, date_iso, pf:swing) %>%
gt::gt() %>%
cols_merge(vars(pf),
vars(pa),
pattern = "{1}-{2}") %>%
cols_label(tm = "Team", opp = "Opp", date_iso = "Date",
margin = "Margin", swing = "Swing", pf = "Score", deficit = "Deficit") %>%
data_color(
columns = vars(margin),
colors = scales::col_numeric(
palette = c("#F8F8F8","#30a2da"),
domain = NULL
)
) %>%
data_color(
columns = vars(swing),
colors = scales::col_numeric(
palette = c("#F8F8F8","#30a2da"),
domain = NULL
)
) %>%
tab_style(
style = cell_text(
weight = "bold"
),
locations = cells_data(
rows = tm == "Kansas City Chiefs"
)
) %>%
tab_header(title = "Kansas City's comeback was historic",
subtitle = "The largest swing and margin of victory ever for a deficit of 24 points or more") %>%
tab_source_note("Table: @thomas_mock | Data: Pro Football Reference") %>%
tab_footnote(footnote = "Margin = margin of victory",
locations = cells_column_labels(
columns = vars(margin)
)
) %>%
tab_footnote(footnote = "Deficit = largest deficit at any point in the game",
locations = cells_column_labels(
columns = vars(deficit)
)
) %>%
tab_footnote(footnote = "Swing = margin of victory + deficit",
locations = cells_column_labels(
columns = vars(swing)
)
)
gt_table
gtsave(gt_table, "kc_table.png")
kc_bar <- plot_df %>%
filter(deficit >= 24) %>%
mutate(tm_year = paste(tm, as.character(year), sep = "-"),
tm_year = fct_reorder(tm_year, swing)) %>%
select(tm_year, pf:swing) %>%
ggplot(aes(x = fct_reorder(tm_year, swing), y = swing)) +
geom_col(aes(fill = if_else(tm_year == "Kansas City Chiefs-2020", "red", "grey"))) +
geom_text(aes(label = margin), hjust = 1.5, color = "white", fontface = "bold", size = 6) +
geom_hline(yintercept = 0, size = 1, color = "black") +
scale_fill_identity() +
coord_flip() +
theme_minimal() +
theme(panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
axis.text = element_text(face = "bold", size = 14, color = "black"),
axis.title = element_text(size = 16),
plot.title = element_text(face = "bold", size = 20)) +
labs(x = "",
y = "\nPoint Swing",
title = "Kansas City's comeback from 24 points down was historic",
subtitle = "Point Swing = Deficit + Margin of Victory\nInset number = Margin of Victory",
caption = "\nPlot: @thomas_mock | Data: Pro Football Reference")
kc_bar
ggsave("kc_bar.png", kc_bar, height = 12, width = 14, units = "in", dpi = "retina")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment