Skip to content

Instantly share code, notes, and snippets.

@schochastics
Created April 10, 2019 17:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save schochastics/4073062af40c3a507772394a8d802efa to your computer and use it in GitHub Desktop.
Save schochastics/4073062af40c3a507772394a8d802efa to your computer and use it in GitHub Desktop.
Barchart Race of Top NBA Scorers
library(rvest)
library(tidyverse)
library(gganimate)
library(extrafont)
loadfonts()
get_season <- function(x){
url <- paste0("https://www.basketball-reference.com/leagues/NBA_",x,"_totals.html")
url %>%
read_html() %>%
html_table() %>%
.[[1]] %>%
dplyr::filter(Rk!="Rk") %>%
select(Player,Tm,Age,TRB,PTS) %>%
mutate(year=x)
}
if(!file.exists("player_stats.csv")){
player_tbl <- map_dfr(1955:2019,get_season)
write_csv(player_tbl,"player_stats.csv")
} else{
player_tbl <- read_csv("player_stats.csv")
}
# get colors ----
nba_teams <- "https://en.wikipedia.org/wiki/Wikipedia:WikiProject_National_Basketball_Association/National_Basketball_Association_team_abbreviations" %>%
read_html %>%
html_table(header = T) %>%
.[[1]] %>%
rename(Tm=`Abbreviation/Acronym`)
nba_colors <- tibble::tribble(
~Franchise, ~primary, ~secondary, ~tertiary,
"Atlanta Hawks", "#e13a3e", "#c4d600", "#061922",
"Boston Celtics", "#008348", "#bb9753", "#a73832",
"Brooklyn Nets", "#061922", NA, NA,
"Charlotte Hornets", "#1d1160", "#008ca8", "#a1a1a4",
"Chicago Bulls", "#ce1141", "#061922", NA,
"Cleveland Cavaliers", "#860038", "#fdbb30", "#002d62",
"Dallas Mavericks", "#007dc5", "#c4ced3", "#061922",
"Denver Nuggets", "#4d90cd", "#fdb927", "#0f586c",
"Detroit Pistons", "#ed174c", "#006bb6", "#0f586c",
"Golden State Warriors", "#fdb927", "#006bb6", "#26282a",
"Houston Rockets", "#ce1141", "#c4ced3", "#061922",
"Indiana Pacers", "#ffc633", "#00275d", "#bec0c2",
"Los Angeles Clippers", "#ed174c", "#006bb6", "#061922",
"Los Angeles Lakers", "#fdb927", "#552582", "#061922",
"Memphis Grizzlies", "#0f586c", "#7399c6", "#bed4e9",
"Miami Heat", "#98002e", "#f9a01b", "#061922",
"Milwaukee Bucks", "#00471b", "#f0ebd2", "#061922",
"Minnesota Timberwolves", "#005083", "#00a94f", "#c4ced3",
"New Orleans Pelicans", "#002b5c", "#e31837", "#b4975a",
"New York Knicks", "#006bb6", "#f58426", "#bec0c2",
"Oklahoma City Thunder", "#007dc3", "#f05133", "#fdbb30",
"Orlando Magic", "#007dc5", "#c4ced3", "#061922",
"Philadelphia 76ers", "#ed174c", "#006bb6", "#002b5c",
"Phoenix Suns", "#e56020", "#1d1160", "#63717a",
"Portland Trail Blazers", "#e03a3e", "#bac3c9", "#061922",
"Sacramento Kings", "#724c9f", "#8e9090", "#061922",
"San Antonio Spurs", "#bac3c9", "#061922", NA,
"Toronto Raptors", "#ce1141", "#061922", "#a1a1a4",
"Utah Jazz", "#002b5c", "#f9a01b", "#00471b",
"Washington Wizards", "#002b5c", "#e31837", "#c4ced4"
)
nba_team_cols <- left_join(nba_teams,nba_colors,by="Franchise") %>%
mutate_all(list(~coalesce(.,"white"))) %>%
mutate_at(vars(primary:tertiary),list(~coalesce(.,"grey25")))
# adjust players that switches club and fix names ---
player_tbl <- player_tbl %>%
mutate(Player=str_remove_all(Player,"\\*")) %>%
group_by(Player,year) %>%
mutate(Tm=if_else(Tm=="TOT",lead(Tm,1),Tm),PTS=max(PTS),TRB=max(TRB)) %>%
slice(1) %>%
ungroup()
# cumulative points ----
player_tbl <- player_tbl %>%
arrange(Player,year) %>%
group_by(Player) %>%
mutate(total_pts = cumsum(PTS),total_trb=cumsum(TRB))
# build top ten per year ----
top10pts_fct <- function(x){
player_tbl %>%
dplyr::filter(year<=x) %>%
group_by(Player) %>%
dplyr::summarise(pts=max(total_pts),Tm=last(Tm)) %>%
ungroup() %>%
arrange(-pts) %>%
top_n(20,pts) %>%
mutate(rk=row_number())
}
top10pts_tbl <- map_dfr(1955:2019,top10pts_fct,.id = "year") %>%
mutate(year=as.numeric(year)+1954)
fill_col <- nba_team_cols$primary
names(fill_col) <- nba_team_cols$Tm
frame_col <- nba_team_cols$secondary
names(frame_col) <- nba_team_cols$Tm
no_col_teams <- unique(top10pts_tbl$Tm)[!unique(top10pts_tbl$Tm)%in%names(fill_col)]
no_cols <- rep("grey66",length(no_col_teams))
names(no_cols) <- no_col_teams
fill_col <- c(fill_col,no_cols)
frame_col <- c(frame_col,no_cols)
p <- top10pts_tbl %>%
ggplot(aes(x = -rk,y = pts, group = Player)) +
geom_col(aes(fill=Tm,colour=Tm),size=1.5)+
geom_text(aes(label=paste0(Player," (",Tm,")")),hjust=1.1,family="CM Sans",fontface="bold")+
geom_text(aes(label=scales::comma(pts)),hjust=-0.1,family="CM Sans")+
coord_flip(clip="off",expand=F) +
scale_y_continuous("",breaks = seq(0,35000,5000)) +
scale_fill_manual(values=fill_col)+
scale_colour_manual(values=frame_col)+
theme(panel.grid.major.x=element_line(colour="grey"),
plot.title = element_text(family="CM Sans",face="bold",size=12),
panel.grid.major.y=element_blank(),
panel.background = element_rect(fill="white"),
legend.position = "none",
plot.margin = margin(1,1,1,2,"cm"),
axis.text.y=element_blank(),
axis.text.x=element_blank(),
axis.ticks = element_blank()) +
labs(x="",y="",title="NBA Top Scorers as of {round(frame_time,0)}",
caption="Source: basketball-reference.com\n @schochastics")+
transition_time(year) +
view_follow(fixed_x = T,fixed_y=c(0,NA))
p_score <- animate(p, nframes = 975, fps = 15, end_pause = 100, width = 800, height = 600)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment