Skip to content

Instantly share code, notes, and snippets.

@Henryjean
Last active June 18, 2021 14:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Henryjean/00df8f8f0a19d7fc1df19184edfab321 to your computer and use it in GitHub Desktop.
Save Henryjean/00df8f8f0a19d7fc1df19184edfab321 to your computer and use it in GitHub Desktop.
# Load packages
library(tidyverse)
library(nbastatR)
library(extrafont)
library(ballr)
library(rvest)
library(janitor)
library(hablar)
library(ggforce)
library(ggbrace)
library(magick)
library(ggtext)
# Custom theme
theme_owen <- function () {
theme_minimal(base_size=9, base_family="Consolas") %+replace%
theme(
panel.grid.minor = element_blank(),
plot.background = element_rect(fill = 'floralwhite', color = "floralwhite")
)
}
# Get Basketball Reference team name info (for merging purposes later)
bref_tms <- dictionary_bref_teams()
bref_tms <- bref_tms %>% filter(seasonLast == 2021) %>% select(nameTeamBREF, slugTeamBREF)
bref_tms <- bref_tms %>%
mutate(slugTeamBREF = case_when(
slugTeamBREF == "NJN" ~ "BRK",
slugTeamBREF == "CHA" ~ "CHO",
slugTeamBREF == "NOH" ~ "NOP",
TRUE ~ slugTeamBREF
))
# Get advanced basketabll stats from basketabll reference
df_advanced_stats <- NBAPerGameAdvStatistics(season = 2021)
# Select each team's highest usasge player (min. 1000 minutes).
# Exclude LaMelo Ball since he's injured
df_top_usage <- df_advanced_stats %>%
filter(mp >= 1000 & tm != "TOT" & player != "LaMelo Ball") %>%
group_by(tm) %>%
arrange(desc(usgpercent)) %>%
slice(1:1) %>%
select(player, tm, mp, usgpercent)
# Get each team's Net Rating from basketabll-reference
url <- "https://www.basketball-reference.com/leagues/NBA_2021.html"
bref_tables <- url %>%
read_html() %>%
html_table()
df_tm_net_rating <- bref_tables[[11]]
df_tm_net_rating <- df_tm_net_rating %>%
row_to_names(row_number = 1) %>%
clean_names()
df_tm_net_rating <- df_tm_net_rating %>% select(team, n_rtg)
# Join with bref_tms to get slug team names
df_tm_net_rating <- left_join(df_tm_net_rating, bref_tms, by = c("team" = "nameTeamBREF"))
# Get Player On / Off data
url <- "https://www.basketball-reference.com/leagues/NBA_2021_play-by-play.html"
df_player_on_off <- url %>%
read_html() %>%
html_table(fill = TRUE) %>%
as.data.frame()
df_player_on_off <- df_player_on_off %>%
row_to_names(row_number = 1) %>%
clean_names()
df_player_on_off <- df_player_on_off %>%
select(player, tm, mp, on_court, on_off)
# Merge player on/off with team net ratings
df <- left_join(df_player_on_off, df_tm_net_rating, by = c("tm" = "slugTeamBREF"))
# Filter out bad data
df <- df %>% filter(tm != "Tm" & tm != "TOT")
# Change character data to numeric
df <- df %>% retype()
# Clean up player name
df <- df %>%
mutate(player = gsub(" Jr.", "", player),
player = gsub(" Sr.", "", player),
player = gsub(" III", "", player),
player = gsub(" II", "", player),
player = gsub(" IV", "", player))
# Calcualte a team's net rating when a player is OFF the court
df <- df %>%
mutate(off_court = case_when(
on_court >= 0 & on_off >= 0 ~ on_court - on_off,
on_court >= 0 & on_off < 0 ~ on_court + abs(on_off),
on_court < 0 & on_off >= 0 ~ on_court - on_off,
on_court < 0 & on_off < 0 ~ on_court + abs(on_off),
TRUE ~ 0
))
# Comet Plot --------------------------------------------------------------
# Use our dataframe of the top usage player on each team and find out whether
# their team is better or wose with them on/off the court
df_comet <- df_top_usage %>%
left_join(., df, by = c("player", "tm")) %>%
mutate(pos_neg = case_when(
on_off >= 0 ~ "Better On",
TRUE ~ "Worse on",
))
# Combine player and slug team names together
df_comet$player_tm <- paste0(df_comet$player, " - ", df_comet$tm)
comet_plot <- df_comet %>%
ggplot() +
geom_link(aes(x = off_court, y = fct_reorder(player_tm, on_off), xend = on_court, yend = fct_reorder(player_tm, on_off), color = pos_neg, size = stat(index))) +
scale_color_manual(values = c("#00A087FF", "#E64B35FF")) +
scale_size(range = c(.01, 4)) +
scale_x_continuous(labels = c("-10", "-5", "0", "+5", "+10"), breaks = seq(-10, 10, 5)) +
theme_owen() +
geom_point(
data = filter(df_comet, on_off > 0),
aes(on_court, y = fct_reorder(player_tm, on_off), color = pos_neg),
shape = 21,
fill = "white",
size = 3.5
) +
geom_point(
data = filter(df_comet, on_off < 0),
aes(on_court, y = fct_reorder(player_tm, on_off), color = pos_neg),
shape = 21,
fill = "white",
size = 3.5
) +
annotate(geom = 'label', x = 7.5, y = 3.5, label = "Team is worse\nwith them On", family = "Consolas", color = "#E64B35FF", fontface = 'bold', fill = "floralwhite", label.size = 0, size = 3) +
annotate(geom = 'label', x = -7.5, y = 22.5, label = "Team is better\nwith them On", family = "Consolas", color = "#00A087FF", fontface = 'bold', fill = "floralwhite", label.size = 0, size = 3) +
theme(legend.position = 'none',
plot.title.position = 'plot',
axis.text.y = element_text(size = 6),
plot.title = element_text(face = 'bold', size = 15),
plot.subtitle = element_text(size = 7),
plot.margin = margin(10, 10, 20, 10)) +
labs(x = "Net Rating With Player On Or Off The Court",
y = "",
title = "Net Rating When Player Is On Or Off The Court",
subtitle = "Among each team's highest usage player (min. 1000 minutes) | Players sorted by their On/Off Differential")
# Save Plot
ggsave("Comet_Plot.png", comet_plot, w = 6, h = 6, dpi = 300, type = "cairo")
# Make an inset plot that we'll use for our key
inset_plot <- df_comet %>%
ggplot() +
geom_link(aes(x = 0, y = 1, xend = 1, yend = 1, size = stat(index)), color = "#00A087FF") +
scale_size(range = c(.00001, 3)) +
scale_y_continuous(limits = c(.95, 1.05)) +
scale_x_continuous(limits = c(-.2, 1.2)) +
theme_owen() +
labs(title = "KEY") +
coord_cartesian(clip = 'off') +
geom_point(aes(x = 1, y = 1), color = "#00A087FF",shape = 21,fill = "white", size = 2.5) +
theme(legend.position = 'none',
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid.major = element_blank(),
plot.title = element_text(hjust = .5, face = 'bold'),
plot.background = element_rect(fill = 'floralwhite', color = "black")) +
geom_brace(0.005,1.0175,1.005,1.0175) +
annotate(geom = 'text', x = 0.5, y = 1.025, label = "On/Off Differential", family = "Consolas", size = 2.5, hjust = .5, lineheight = 1) +
annotate(geom = 'text', x = 0, y = .985, label = "Net Rating\nWith Player OFF", family = "Consolas", size = 1.85, hjust = .5, lineheight = 1) +
annotate(geom = 'text', x = 1, y = .985, label = "Net Rating\nWith Player ON", family = "Consolas", size = 1.85, hjust = .5, lineheight = 1)
# Save our inset plot -- we'll use this later
ggsave("Inset.png", inset_plot, w = 1.5, h = 1.5, dpi = 300, type = "cairo")
# Read in Inset plot
inset <- image_read("Inset.png")
# Read in Comet plot
graf <- image_read("Comet_Plot.png")
# Layer Inset plot on top of Comet plot
image_composite(graf, inset, offset = "+1300+900") %>% image_write("Comet_Plot.png")
# Histogram plot ----------------------------------------------------------
team_nrtg <- df %>% select(n_rtg) %>% distinct()
on_text <- data.frame(value = NA, y = NA, lab = NA,
name = factor("Player On Court",levels = c("Player Off Court","Player On Court")))
off_text <- data.frame(value = NA, y = NA, lab = NA,
name = factor("Player Off Court",levels = c("Player Off Court","Player On Court")))
df %>%
filter(mp >= 1000) %>%
select(player, tm, off_court, on_court) %>%
pivot_longer(-c(player, tm)) %>%
mutate(name = case_when(
name == "on_court" ~ "Player On Court",
name == "off_court" ~ "Player Off Court",
TRUE ~ "Net Rating"
)) %>%
ggplot(aes(x = value)) +
geom_histogram(alpha = .333, color = '#7570B3FF', fill = '#7570B3FF', bins = 30) +
geom_histogram(data = team_nrtg, aes(x = n_rtg, fill = n_rtg), alpha=0.85, color = 'black', fill = '#1B9E77FF', bins = 30) +
geom_label(data = on_text, aes(x = 10, y = 21.5), label = "Team\nNet Ratings", size = 3, fill = "floralwhite", fontface = 'bold', label.size = 0, color = "#1B9E77FF", family = "Consolas", hjust = .5, alpha = .5) +
geom_curve(data = on_text, aes(x = 10, y = 20.5, xend = 2.5, yend = 3), color = "#1B9E77FF", curvature = -0.2, arrow = arrow(length = unit(0.03, "npc"))) +
geom_label(data = on_text, aes(x = -10, y = 20), label = "Player ON\nNet Ratings", size = 3, fill = "floralwhite", fontface = 'bold', label.size = 0, color = "#7570B3FF", family = "Consolas", hjust = .5, alpha = .5) +
geom_curve(data = on_text, aes(x = -10, y = 19, xend = -3.75, yend = 15), color = "#7570B3FF", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc"))) +
geom_label(data = off_text, aes(x = 10, y = 21.5), label = "Team\nNet Ratings", size = 3, fill = "floralwhite", fontface = 'bold', label.size = 0, color = "#1B9E77FF", family = "Consolas", hjust = .5, alpha = .5) +
geom_curve(data = off_text, aes(x = 10, y = 20.5, xend = 2.5, yend = 3), color = "#1B9E77FF", curvature = -0.2, arrow = arrow(length = unit(0.03, "npc"))) +
geom_label(data = off_text, aes(x = -14.5, y = 20), label = "Player OFF\nNet Ratings", size = 3, fill = "floralwhite", fontface = 'bold', label.size = 0, color = "#7570B3FF", family = "Consolas", hjust = .5, alpha = .5) +
geom_curve(data = off_text, aes(x = -14.5, y = 19, xend = -8, yend = 15), color = "#7570B3FF", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc"))) +
facet_wrap(~fct_rev(name), nrow = 1) +
theme_owen() +
scale_x_continuous(limits = c(-20, 20), breaks = seq(-20, 20, 10), labels = c("-20", "-10", "0", "+10", "+20")) +
theme(plot.title = element_markdown(size = 11, face = 'bold'),
plot.title.position = 'plot',
strip.text = element_text(size = 10),
panel.spacing.x = unit(2, 'lines'),
plot.margin = margin(10, 10, 20, 10),
plot.caption = element_text(size = 6.5, vjust = -3, hjust = 1.03)) +
labs(x = "Net Rating",
y = "Count",
title = "Distribution Of <span style='color:#7570B3FF'>**Individual Player Net Ratings**</span> vs. <span style='color:#1B9E77FF'>**Team Net Ratings**</span>",
subtitle = "Minimum 1000 minutes played")
ggsave("Player_vs_Team_Net_Ratings.png", w = 6, h = 6, dpi = 300, type = 'cairo')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment