-
-
Save Henryjean/00df8f8f0a19d7fc1df19184edfab321 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Load packages | |
library(tidyverse) | |
library(nbastatR) | |
library(extrafont) | |
library(ballr) | |
library(rvest) | |
library(janitor) | |
library(hablar) | |
library(ggforce) | |
library(ggbrace) | |
library(magick) | |
library(ggtext) | |
# Custom theme | |
theme_owen <- function () { | |
theme_minimal(base_size=9, base_family="Consolas") %+replace% | |
theme( | |
panel.grid.minor = element_blank(), | |
plot.background = element_rect(fill = 'floralwhite', color = "floralwhite") | |
) | |
} | |
# Get Basketball Reference team name info (for merging purposes later) | |
bref_tms <- dictionary_bref_teams() | |
bref_tms <- bref_tms %>% filter(seasonLast == 2021) %>% select(nameTeamBREF, slugTeamBREF) | |
bref_tms <- bref_tms %>% | |
mutate(slugTeamBREF = case_when( | |
slugTeamBREF == "NJN" ~ "BRK", | |
slugTeamBREF == "CHA" ~ "CHO", | |
slugTeamBREF == "NOH" ~ "NOP", | |
TRUE ~ slugTeamBREF | |
)) | |
# Get advanced basketabll stats from basketabll reference | |
df_advanced_stats <- NBAPerGameAdvStatistics(season = 2021) | |
# Select each team's highest usasge player (min. 1000 minutes). | |
# Exclude LaMelo Ball since he's injured | |
df_top_usage <- df_advanced_stats %>% | |
filter(mp >= 1000 & tm != "TOT" & player != "LaMelo Ball") %>% | |
group_by(tm) %>% | |
arrange(desc(usgpercent)) %>% | |
slice(1:1) %>% | |
select(player, tm, mp, usgpercent) | |
# Get each team's Net Rating from basketabll-reference | |
url <- "https://www.basketball-reference.com/leagues/NBA_2021.html" | |
bref_tables <- url %>% | |
read_html() %>% | |
html_table() | |
df_tm_net_rating <- bref_tables[[11]] | |
df_tm_net_rating <- df_tm_net_rating %>% | |
row_to_names(row_number = 1) %>% | |
clean_names() | |
df_tm_net_rating <- df_tm_net_rating %>% select(team, n_rtg) | |
# Join with bref_tms to get slug team names | |
df_tm_net_rating <- left_join(df_tm_net_rating, bref_tms, by = c("team" = "nameTeamBREF")) | |
# Get Player On / Off data | |
url <- "https://www.basketball-reference.com/leagues/NBA_2021_play-by-play.html" | |
df_player_on_off <- url %>% | |
read_html() %>% | |
html_table(fill = TRUE) %>% | |
as.data.frame() | |
df_player_on_off <- df_player_on_off %>% | |
row_to_names(row_number = 1) %>% | |
clean_names() | |
df_player_on_off <- df_player_on_off %>% | |
select(player, tm, mp, on_court, on_off) | |
# Merge player on/off with team net ratings | |
df <- left_join(df_player_on_off, df_tm_net_rating, by = c("tm" = "slugTeamBREF")) | |
# Filter out bad data | |
df <- df %>% filter(tm != "Tm" & tm != "TOT") | |
# Change character data to numeric | |
df <- df %>% retype() | |
# Clean up player name | |
df <- df %>% | |
mutate(player = gsub(" Jr.", "", player), | |
player = gsub(" Sr.", "", player), | |
player = gsub(" III", "", player), | |
player = gsub(" II", "", player), | |
player = gsub(" IV", "", player)) | |
# Calcualte a team's net rating when a player is OFF the court | |
df <- df %>% | |
mutate(off_court = case_when( | |
on_court >= 0 & on_off >= 0 ~ on_court - on_off, | |
on_court >= 0 & on_off < 0 ~ on_court + abs(on_off), | |
on_court < 0 & on_off >= 0 ~ on_court - on_off, | |
on_court < 0 & on_off < 0 ~ on_court + abs(on_off), | |
TRUE ~ 0 | |
)) | |
# Comet Plot -------------------------------------------------------------- | |
# Use our dataframe of the top usage player on each team and find out whether | |
# their team is better or wose with them on/off the court | |
df_comet <- df_top_usage %>% | |
left_join(., df, by = c("player", "tm")) %>% | |
mutate(pos_neg = case_when( | |
on_off >= 0 ~ "Better On", | |
TRUE ~ "Worse on", | |
)) | |
# Combine player and slug team names together | |
df_comet$player_tm <- paste0(df_comet$player, " - ", df_comet$tm) | |
comet_plot <- df_comet %>% | |
ggplot() + | |
geom_link(aes(x = off_court, y = fct_reorder(player_tm, on_off), xend = on_court, yend = fct_reorder(player_tm, on_off), color = pos_neg, size = stat(index))) + | |
scale_color_manual(values = c("#00A087FF", "#E64B35FF")) + | |
scale_size(range = c(.01, 4)) + | |
scale_x_continuous(labels = c("-10", "-5", "0", "+5", "+10"), breaks = seq(-10, 10, 5)) + | |
theme_owen() + | |
geom_point( | |
data = filter(df_comet, on_off > 0), | |
aes(on_court, y = fct_reorder(player_tm, on_off), color = pos_neg), | |
shape = 21, | |
fill = "white", | |
size = 3.5 | |
) + | |
geom_point( | |
data = filter(df_comet, on_off < 0), | |
aes(on_court, y = fct_reorder(player_tm, on_off), color = pos_neg), | |
shape = 21, | |
fill = "white", | |
size = 3.5 | |
) + | |
annotate(geom = 'label', x = 7.5, y = 3.5, label = "Team is worse\nwith them On", family = "Consolas", color = "#E64B35FF", fontface = 'bold', fill = "floralwhite", label.size = 0, size = 3) + | |
annotate(geom = 'label', x = -7.5, y = 22.5, label = "Team is better\nwith them On", family = "Consolas", color = "#00A087FF", fontface = 'bold', fill = "floralwhite", label.size = 0, size = 3) + | |
theme(legend.position = 'none', | |
plot.title.position = 'plot', | |
axis.text.y = element_text(size = 6), | |
plot.title = element_text(face = 'bold', size = 15), | |
plot.subtitle = element_text(size = 7), | |
plot.margin = margin(10, 10, 20, 10)) + | |
labs(x = "Net Rating With Player On Or Off The Court", | |
y = "", | |
title = "Net Rating When Player Is On Or Off The Court", | |
subtitle = "Among each team's highest usage player (min. 1000 minutes) | Players sorted by their On/Off Differential") | |
# Save Plot | |
ggsave("Comet_Plot.png", comet_plot, w = 6, h = 6, dpi = 300, type = "cairo") | |
# Make an inset plot that we'll use for our key | |
inset_plot <- df_comet %>% | |
ggplot() + | |
geom_link(aes(x = 0, y = 1, xend = 1, yend = 1, size = stat(index)), color = "#00A087FF") + | |
scale_size(range = c(.00001, 3)) + | |
scale_y_continuous(limits = c(.95, 1.05)) + | |
scale_x_continuous(limits = c(-.2, 1.2)) + | |
theme_owen() + | |
labs(title = "KEY") + | |
coord_cartesian(clip = 'off') + | |
geom_point(aes(x = 1, y = 1), color = "#00A087FF",shape = 21,fill = "white", size = 2.5) + | |
theme(legend.position = 'none', | |
axis.text = element_blank(), | |
axis.title = element_blank(), | |
panel.grid.major = element_blank(), | |
plot.title = element_text(hjust = .5, face = 'bold'), | |
plot.background = element_rect(fill = 'floralwhite', color = "black")) + | |
geom_brace(0.005,1.0175,1.005,1.0175) + | |
annotate(geom = 'text', x = 0.5, y = 1.025, label = "On/Off Differential", family = "Consolas", size = 2.5, hjust = .5, lineheight = 1) + | |
annotate(geom = 'text', x = 0, y = .985, label = "Net Rating\nWith Player OFF", family = "Consolas", size = 1.85, hjust = .5, lineheight = 1) + | |
annotate(geom = 'text', x = 1, y = .985, label = "Net Rating\nWith Player ON", family = "Consolas", size = 1.85, hjust = .5, lineheight = 1) | |
# Save our inset plot -- we'll use this later | |
ggsave("Inset.png", inset_plot, w = 1.5, h = 1.5, dpi = 300, type = "cairo") | |
# Read in Inset plot | |
inset <- image_read("Inset.png") | |
# Read in Comet plot | |
graf <- image_read("Comet_Plot.png") | |
# Layer Inset plot on top of Comet plot | |
image_composite(graf, inset, offset = "+1300+900") %>% image_write("Comet_Plot.png") | |
# Histogram plot ---------------------------------------------------------- | |
team_nrtg <- df %>% select(n_rtg) %>% distinct() | |
on_text <- data.frame(value = NA, y = NA, lab = NA, | |
name = factor("Player On Court",levels = c("Player Off Court","Player On Court"))) | |
off_text <- data.frame(value = NA, y = NA, lab = NA, | |
name = factor("Player Off Court",levels = c("Player Off Court","Player On Court"))) | |
df %>% | |
filter(mp >= 1000) %>% | |
select(player, tm, off_court, on_court) %>% | |
pivot_longer(-c(player, tm)) %>% | |
mutate(name = case_when( | |
name == "on_court" ~ "Player On Court", | |
name == "off_court" ~ "Player Off Court", | |
TRUE ~ "Net Rating" | |
)) %>% | |
ggplot(aes(x = value)) + | |
geom_histogram(alpha = .333, color = '#7570B3FF', fill = '#7570B3FF', bins = 30) + | |
geom_histogram(data = team_nrtg, aes(x = n_rtg, fill = n_rtg), alpha=0.85, color = 'black', fill = '#1B9E77FF', bins = 30) + | |
geom_label(data = on_text, aes(x = 10, y = 21.5), label = "Team\nNet Ratings", size = 3, fill = "floralwhite", fontface = 'bold', label.size = 0, color = "#1B9E77FF", family = "Consolas", hjust = .5, alpha = .5) + | |
geom_curve(data = on_text, aes(x = 10, y = 20.5, xend = 2.5, yend = 3), color = "#1B9E77FF", curvature = -0.2, arrow = arrow(length = unit(0.03, "npc"))) + | |
geom_label(data = on_text, aes(x = -10, y = 20), label = "Player ON\nNet Ratings", size = 3, fill = "floralwhite", fontface = 'bold', label.size = 0, color = "#7570B3FF", family = "Consolas", hjust = .5, alpha = .5) + | |
geom_curve(data = on_text, aes(x = -10, y = 19, xend = -3.75, yend = 15), color = "#7570B3FF", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc"))) + | |
geom_label(data = off_text, aes(x = 10, y = 21.5), label = "Team\nNet Ratings", size = 3, fill = "floralwhite", fontface = 'bold', label.size = 0, color = "#1B9E77FF", family = "Consolas", hjust = .5, alpha = .5) + | |
geom_curve(data = off_text, aes(x = 10, y = 20.5, xend = 2.5, yend = 3), color = "#1B9E77FF", curvature = -0.2, arrow = arrow(length = unit(0.03, "npc"))) + | |
geom_label(data = off_text, aes(x = -14.5, y = 20), label = "Player OFF\nNet Ratings", size = 3, fill = "floralwhite", fontface = 'bold', label.size = 0, color = "#7570B3FF", family = "Consolas", hjust = .5, alpha = .5) + | |
geom_curve(data = off_text, aes(x = -14.5, y = 19, xend = -8, yend = 15), color = "#7570B3FF", curvature = 0.2, arrow = arrow(length = unit(0.03, "npc"))) + | |
facet_wrap(~fct_rev(name), nrow = 1) + | |
theme_owen() + | |
scale_x_continuous(limits = c(-20, 20), breaks = seq(-20, 20, 10), labels = c("-20", "-10", "0", "+10", "+20")) + | |
theme(plot.title = element_markdown(size = 11, face = 'bold'), | |
plot.title.position = 'plot', | |
strip.text = element_text(size = 10), | |
panel.spacing.x = unit(2, 'lines'), | |
plot.margin = margin(10, 10, 20, 10), | |
plot.caption = element_text(size = 6.5, vjust = -3, hjust = 1.03)) + | |
labs(x = "Net Rating", | |
y = "Count", | |
title = "Distribution Of <span style='color:#7570B3FF'>**Individual Player Net Ratings**</span> vs. <span style='color:#1B9E77FF'>**Team Net Ratings**</span>", | |
subtitle = "Minimum 1000 minutes played") | |
ggsave("Player_vs_Team_Net_Ratings.png", w = 6, h = 6, dpi = 300, type = 'cairo') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment