Skip to content

Instantly share code, notes, and snippets.

@bhoung
Created June 30, 2021 12:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bhoung/3833c6afa438296c6c6dfe8964d20da7 to your computer and use it in GitHub Desktop.
Save bhoung/3833c6afa438296c6c6dfe8964d20da7 to your computer and use it in GitHub Desktop.
---
title: Minutes played by Nets, playoffs
layout: post
categories:
- posts
tags:
- nba, sports
output:
#html_document
md_document:
variant: markdown_github+backtick_code_blocks
preserve_yaml: true
toc: false
fig_retina: 2
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r include=FALSE}
library(rvest)
library(cowplot)
library(tidyverse)
library(xml2)
library(stringi)
```
```{r include=FALSE}
roster <- "https://www.basketball-reference.com/teams/BRK/2021.html"
html <- read_html(roster)
tables <- html %>% html_nodes("table") %>% html_table()
df <- tables[1] %>% data.frame()
players <- html %>% html_nodes("table") %>% xml_find_all(".//a") %>% xml_attrs()
gamelogs <- unlist(players)
pgl <- gamelogs[grep("gamelog", gamelogs)] %>% unique() %>% list()
pgl <- pgl[[1]]
playoffs_pl <- pgl[c(1,2,3,4,6,9,10,14,12,15,18)]
```
```{r include=FALSE}
get_player_url <- function(path) {
paste("https://www.basketball-reference.com/",path,sep="")
}
player_urls <- unlist(lapply(FUN=get_player_url, playoffs_pl))
get_data <- function(player_url) {
source <- readLines(player_url)
src_text <- paste0(source, collapse = "")
table_regex <- as.character(unlist(stri_extract_all(src_text, regex='<table(.*?)/table>', omit_no_match = T, simplify = T)))
#table 9 is playoff table
html_parse2 <- read_html(table_regex[9])
tdf <- html_table(html_parse2) %>% data.frame()
return(tdf)
}
```
```{r include=FALSE}
player_url <- player_urls[1]
tdf <- get_data(player_url)
tdf$date <- as.Date(tdf$Date, "%Y-%m-%d")
tdf$min <- as.difftime(tdf$MP, format = "%M:%S", units = "mins")
```
```{r include=FALSE}
library(stringr)
plist <- list()
for (p in player_urls) {
pname <- str_split(p, "/")[[1]][7]
plist <- c(plist, pname) # Add element to list
data <- get_data(p)
assign(pname, data)
}
unlist(plist)
```
```{r include=FALSE}
i = 1
for (p in plist) {
if (i == 1) {
data = get(p)
data$player <- p
} else {
df = get(p)
df$player <- p
data = rbind(data, df)
}
i = i + 1
}
```
```{r echo=FALSE}
table(data$player)
```
```{r include=FALSE}
library(tidyverse)
nets_games_url <- 'https://www.basketball-reference.com/teams/BRK/2021_games.html'
html <- read_html(nets_games_url)
tables <- html %>% html_nodes("table") %>% html_table()
nets_regular_season <- tables[[1]] %>% data.frame()
nets_playoffs <- tables[[2]] %>% data.frame()
games <- nets_playoffs %>% select(G, Date, Var.6, Opponent, Var.8, Tm, Opp)
names(games) <- c("G", "Date", "Home", "Opponent", "WinLoss", "Team", "Opp")
```
```{r include=FALSE}
games$date <- apply(games, 1, FUN = function(x) {substr(x[c("Date")], 6, 20) })
games$date <- as.Date(games$date, c("%B %d, %Y"))
#glimpse(games)
games$Team <- as.integer(games$Team)
games$Opp <- as.integer(games$Opp)
games$diff <- games$Team - games$Opp
```
```{r warning=FALSE, include=FALSE}
games$home <- ifelse(games$Home != '@', 'HOME', 'AWAY')
table(games$home)
```
```{r include=FALSE}
data$date <- as.Date(data$Date, format="%Y-%m-%d")
dfm <- games %>% left_join(data, by=c("date"="date"))
mp <- dfm %>% select(date, MP, player, WinLoss, diff, home, Opponent, `G.x`)
mp$min <- as.difftime(mp$MP, format = "%M:%S", units = "mins")
```
```{r echo=FALSE, message=FALSE, warning=FALSE}
library(cowplot)
library(ggrepel)
mp <- mp %>% mutate(player = case_when(
player == "brownbr01" ~ "Brown",
player == "claxtni01" ~ "Claxton",
player == "duranke01" ~ "Durant",
player == "greenje02" ~ "Green",
player == "griffbl01" ~ "Griffin",
player == "hardeja01" ~ "Harden",
player == "harrijo01" ~ "Harris",
player == "irvinky01" ~ "Irving",
player == "jamesmi02" ~ "James",
player == "johnsty01" ~ "Johnston",
player == "shamela01" ~ "Shamet",
))
reduced_mins <- c("Johnston","Claxton","James","Shamet","Green")
mp %>% filter(!is.na(player)) %>% ggplot(.) + geom_point(aes(x=date, y=min, colour=player)) + facet_grid(WinLoss ~ .) + theme_cowplot()
mp %>% filter(!is.na(player)) %>% ggplot(.) + geom_line(aes(x=date, y=min, colour=player)) + facet_grid(home ~ .) + theme_cowplot()
mp %>% filter(!is.na(player) & !(player %in% reduced_mins)) %>% ggplot(.) + geom_line(aes(x=date, y=min, colour=player)) + theme_cowplot() + labs(y="minutes played")
mp %>% filter(!is.na(player) & player %in% reduced_mins) %>% ggplot(.) + geom_line(aes(x=date, y=min, colour=player)) + theme_cowplot() + labs(y="minutes played")
mp <- mp %>% group_by(Opponent, player) %>% arrange(date) %>% mutate(gm = row_number())
p1 <- mp %>% filter(!is.na(player) & player %in% reduced_mins) %>% ggplot(.) + geom_line(aes(x=gm, y=min, colour=player)) + theme_cowplot() + labs(y="minutes played") + facet_grid(Opponent ~ .) + scale_x_discrete(limits=seq(1,7)) + labs(x="game") + theme(legend.position = "none") + geom_text_repel(aes(x=gm, y=min, label=ifelse(gm %in% c(1,5), player, "")))
p1
p1 <- mp %>% filter(!is.na(player) & !(player %in% reduced_mins)) %>% ggplot(.) + geom_col(aes(x=gm, y=(min/5), colour=player, fill=player)) + theme_cowplot() + labs(y="minutes played", colour="Player", fill="Player") + facet_grid(Opponent ~ .) + scale_x_discrete(limits=factor(seq(1,7)))
p2 <- mp %>% filter(!is.na(player) & (player %in% reduced_mins)) %>% ggplot(.) + geom_col(aes(x=gm, y=(min/5), colour=player, fill=player)) + theme_cowplot() + labs(y="minutes played", colour="Player", fill="Player") + facet_grid(Opponent ~ .) + scale_x_discrete(limits=factor(seq(1,7)))
mp %>% filter(!is.na(player) & !(player %in% reduced_mins)) %>% ggplot(.) + geom_line(aes(x=gm, y=min, colour=player)) + theme_cowplot() + labs(y="minutes played") + facet_grid(Opponent ~ .) + scale_x_discrete(limits=factor(seq(1,7))) +
theme(legend.position = "none") + geom_text_repel(aes(x=gm, y=min, label=ifelse(gm == 1, player, "")))
p1
p2
#plot_grid(p1, p2, nrow=2, ncol=1)
```
References:
https://stackoverflow.com/questions/40616357/how-to-scrape-tables-inside-a-comment-tag-in-html-with-r
https://stackoverflow.com/questions/49057868/extracting-text-within-tags-inside-html-comments-with-beautifulsoup
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment