Skip to content

Instantly share code, notes, and snippets.

@zdk123
Created October 30, 2017 01:35
Show Gist options
  • Save zdk123/d1dfcf3135f351cc80e6b589083a767a to your computer and use it in GitHub Desktop.
Save zdk123/d1dfcf3135f351cc80e6b589083a767a to your computer and use it in GitHub Desktop.
packages <- c('rvest','dplyr','pipeR', 'knitr')
lapply(packages, library, character.only = T)
css_page <- "#pgl_basic"
getYear <- function(year='2017') {
url <- "https://www.basketball-reference.com/players/j/jamesle01/gamelog/%s"
url <- sprintf(url, as.character(year))
url %>>%
read_html %>>%
html_nodes(css_page) %>>%
html_table(header = F) %>>%
data.frame() %>>%
tbl_df() -> lbj
## cleanup
lbj %>>%
filter(X1 == 'Rk') %>% unique -> names
lbj %>%
filter(X1 != 'Rk') -> lbj
colnames(lbj) <- names
lbj
}
lbj_li <- lapply(2004:2017, getYear)
lbj_df <- do.call('rbind', lbj_li)
colnames(lbj_df)[6] <- 'Away'
colnames(lbj_df)[8] <- 'Outcome'
## Pull out point differential
lbj_df$differential <- as.numeric(paste0(gsub("[^-+]", "\\1",
as.matrix(lbj_df[,'Outcome'])), gsub("\\D", "\\1", as.matrix(lbj_df[,'Outcome']))))
## convert dates
lbj_df %>% mutate(Date=as.Date(Date)) -> lbj_df
second <- which(c(0, diff(lbj_df$Date)) == 1)
first <- second-1
firstLoses <- grepl('L', as.matrix(lbj_df[first,'Outcome']))
biglosses <- lbj_df[first,'differential'] <= -20
## Record of the second night of a back to back, after losing by at least 20 on the first night
lbj_df[second[biglosses],1:8]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment