Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
R markdown notebooks accompanying yt screencast
---
title: "Basketball Reference"
layout: post
categories:
- notebook
tags:
- nba, sports
output:
html_document
# md_document:
# variant: markdown_github+backtick_code_blocks
# preserve_yaml: true
# toc: false
# fig_retina: 2
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Compare '21 Brooklyn Nets team performance based on star players
```{r include=FALSE}
library(rvest)
library(cowplot)
library(tidyverse)
library(xml2)
```
```{r}
roster <- "https://www.basketball-reference.com/teams/BRK/2021.html"
html <- read_html(roster)
tables <- html %>% html_nodes("table") %>% html_table()
df <- tables[1] %>% data.frame()
players <- html %>% html_nodes("table") %>% xml_find_all(".//a") %>% xml_attrs()
gamelogs <- unlist(players)
pgl <- gamelogs[grep("gamelog", gamelogs)] %>% unique() %>% list()
pgl <- pgl[[1]]
playoffs_pl <- pgl[c(1,2,3,4,6,9,10,14,12,15,18)]
```
```{r }
get_player_url <- function(path) {
paste("https://www.basketball-reference.com/",path,sep="")
}
player_urls <- unlist(lapply(FUN=get_player_url, playoffs_pl))
get_data <- function(player_url) {
html <- read_html(player_url)
tables <- html %>% html_nodes("table") %>% html_table()
df <- tables[8] %>% data.frame()
return(df)
}
```
```{r}
for (p in player_urls) {
pname <- str_split(p, "/")[[1]][7]
data <- get_data(p)
assign(pname, data)
}
```
```{r}
plist <- list()
for (p in player_urls) {
pname <- str_split(p, "/")[[1]][7]
plist <- c(plist, pname) # Add element to list
}
unlist(plist)
```
```{r}
i = 1
for (p in plist) {
if (i == 1) {
data = get(p)
data$player <- p
} else {
df = get(p)
df$player <- p
data = rbind(data, df)
}
i = i + 1
}
```
```{r}
table(data$player)
data %>% filter(Rk != "Rk") %>% nrow()
nrow(data)
data <- data %>% filter(Rk != "Rk")
```
```{r}
nets_games_url <- 'https://www.basketball-reference.com/teams/BRK/2021_games.html'
html <- read_html(nets_games_url)
tables <- html %>% html_nodes("table") %>% html_table()
nets_regular_season <- tables[[1]] %>% data.frame()
nets_playoffs <- tables[[2]] %>% data.frame()
games <- nets_regular_season %>% select(G, Date, Var.6, Opponent, Var.8, Tm, Opp)
names(games) <- c("G", "Date", "Home", "Opponent", "WinLoss", "Team", "Opp")
```
```{r}
games$date <- apply(games, 1, FUN = function(x) {substr(x[c("Date")], 6, 20) })
games$date <- as.Date(games$date, c("%B %d, %Y"))
glimpse(games)
games$Team <- as.integer(games$Team)
games$Opp <- as.integer(games$Opp)
games$diff <- games$Team - games$Opp
```
```{r warning=FALSE}
hist(games$diff)
games$home <- games$Home != '@'
table(games$home)
#qplot(games$date, games$diff)
games %>% ggplot(.) + geom_point(aes(y=diff, x=date)) + facet_grid(home ~ .) + geom_smooth(aes(x=date, y=diff))
```
```{r}
data$date <- as.Date(data$Date, format="%Y-%m-%d")
dfm <- games %>% left_join(data, by=c("date"="date"))
mp <- dfm %>% select(date, MP, player, WinLoss, diff)
mp$min <- as.difftime(mp$MP, format = "%M:%S", units = "mins")
mp %>% filter(!is.na(player)) %>% ggplot(.) + geom_point(aes(x=date, y=min, colour=player)) +
facet_grid(WinLoss ~ .) + theme_cowplot()
mp %>% filter(!is.na(player)) %>% ggplot(.) + geom_point(aes(x=date, y=min, colour=player)) +
facet_grid(player ~ .) + theme_cowplot() + labs(y="minutes played")
p1 <- mp %>% filter(!is.na(player) & WinLoss == "W") %>% ggplot(.) + geom_point(aes(x=date, y=min, colour=player)) + facet_grid(player ~ .) + theme_cowplot() + theme(legend.position = "none") + labs(y="minutes played")
p2 <- mp %>% filter(!is.na(player) & WinLoss == "L") %>% ggplot(.) + geom_point(aes(x=date, y=min, colour=player)) + facet_grid(player ~ .) + theme_cowplot()
plot_grid(p1, p2)
mp %>% filter(!is.na(player)) %>% ggplot(.) + geom_point(aes(x=date, y=min, colour=WinLoss)) + facet_grid(player ~ .) + theme_cowplot()
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment