Skip to content

Instantly share code, notes, and snippets.

@Sandy4321
Forked from abresler/nba_2014-15_heatmap
Created December 17, 2015 16:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Sandy4321/149452597866bc07d8a9 to your computer and use it in GitHub Desktop.
Save Sandy4321/149452597866bc07d8a9 to your computer and use it in GitHub Desktop.
NBA 2014-15 Heatmap
getBREFTeamStatTable <-
function(season_end = 2015, table_name = 'team', date = T) {
packages <-
c('rvest','dplyr','pipeR','RCurl', 'XML','reshape2', 'tidyr', 'magrittr')
lapply(packages, library, character.only = T)
base <-
'http://www.basketball-reference.com/leagues/'
season <-
(season_end - 1) %>%
paste0("-",season_end)
league <-
'NBA'
table_name %<>%
tolower
css_page <-
'#' %>% paste0(table_name)
table <-
table_name %>%
paste('stats', sep = "_")
css_id <-
css_page %>%
paste0(" a")
url <-
base %>%
paste0(league,'_',season_end,".html")
if (table_name == 'standings') {
t <-
url %>%
html %>%
html_table(fill = T)
if (season_end >= 1971) {
df <-
t[2] %>% data.frame %>%
tbl_df
names(df) <-
c('team','wins','losses','win_pct','games_back','pts.g','opp_pts.g','srs')
df$conference <-
'Eastern'
df2 <-
t[3] %>%
data.frame %>%
tbl_df
names(df2) <-
c('team','wins','losses','win_pct','games_back','pts.g','opp_pts.g','srs')
df2$conference <- 'Western'
df <-
bind_rows(df,df2)
df$playoff_team <-
df$team %>%
grepl('\\*',.)
df$team %<>%
gsub('\\*','',.)
conference_rank <-
df$team %>%
colsplit('\\(',c('team','conference_rank')) %>%
.$conference_rank
df$conference_rank <-
conference_rank %>%
gsub('\\)','',.) %>%
as.numeric
df$team <-
df$team %>%
colsplit('\\(',c('team','conference_rank')) %>%
.$team %>%
Trim
} else{
df <-
t[2] %>%
data.frame %>%
tbl_df
names(df) <-
c('team','wins','losses','win_pct','games_back','pts.g','opp_pts.g','srs')
div <-
df$team %>% grep('Western Division',.) %>%
as.numeric
df$conference <-
''
df[1:div,'conference'] <-
'Eastern'
df[div:nrow(df),'conference'] <-
'Western'
if (grep('\\(',df$team) %>% length > 0) {
conference_rank <-
df$team %>%
colsplit('\\(',c('team','conference_rank')) %>%
.$conference_rank
df$conference_rank <-
conference_rank %>% gsub('\\)','',.) %>%
as.numeric
df$team <-
df$team %>%
colsplit('\\(',c('team','conference_rank')) %>%
.$team %>%
Trim
}
df$playoff_team <-
df$team %>%
grepl('\\*',.)
df$team %<>%
gsub('\\*','',.)
}
df[df$games_back == '—','games_back'] <-
0
df$games_back %<>%
as.numeric
df %<>%
dplyr::filter(team == 'Baltimore Bullets' | !is.na(wins))
df$point_differential <-
df$pts.g - df$opp_pts.g
team_url_stems <-
url %>%
html %>%
xpathSApply(path = '//*[(@id = "all_standings")]//a',xmlAttrs) %>%
unlist
names(team_url_stems) <-
NULL
bref_team_id <-
team_url_stems %>%
gsub('.html|\\/teams|\\/','',.) %>%
gsub(pattern = "[^[:alpha:]]", replacement = "", .)
df <-
data.frame(season,table_name = 'standings', bref_team_id, df) %>%
tbl_df
df$bref_team_season_url <-
'http://www.basketball-reference.com' %>%
paste0(team_url_stems)
}
else{
df <-
url %>% ## get table
html %>%
html_nodes(css_page) %>%
html_table(header = F) %>%
data.frame %>%
tbl_df
if(df$X1[1] == 'Rk') {
names <-
df %>%
dplyr::filter(X1 == "Rk") %>%
as.character
row_of_header <-
'Rk' %>%
grep(x = df$X1) #find where rank is
start <-
row_of_header + 1
df <-
df[start:nrow(df),]
names %<>%
tolower
names(df) <-
names
}
else{
names <-
df %>%
dplyr::filter(X1 == "Rk") %>%
as.character
row_of_header <-
'Rk' %>%
grep(x = df$X1)
start <-
row_of_header + 1
df <-
df[start:nrow(df),]
names %<>%
tolower
names(df) <-
names
}
names(df) %<>%
gsub('\\%|/','\\.',.)
df$rk <-
NULL
table_name_character <-
c('team','arena')
df[,!(df %>>% names) %in% table_name_character] %>>% apply(2, function(x)
gsub('\\,','',x) %>>%
as.numeric(x)) -> df[,!(df %>>% names) %in% table_name_character] #get rid of commas and make numeric
df$playoff_team <-
df$team %>% grepl(pattern = '\\*')
df$team %<>%
gsub('\\*','',.)
rows <-
df %>%
nrow - 1
df <-
df[1:rows,]
xpath <-
paste0("//*[(@id = '",table_name,"')]//a")
##Grab Team Ids
stems <-
url %>% ## get table
html %>%
xpathSApply(xpath,xmlAttrs) %>%
unlist %>%
as.character
bref_team_id <-
stems %>%
gsub('\\/|.html|teams','',.) %>%
gsub(season_end,'',.)
df <-
data.frame(season,table_name = table, bref_team_id, df) %>%
tbl_df
df$bref_team_season_url <-
'http://www.basketball-reference.com' %>%
paste0(stems)
}
if (date == T) {
df$scrape_time <-
Sys.time()
}
return(df)
}
library(d3heatmap)
team2015 <-
getBREFTeamStatTable(season_end = 2015, table_name = 'team')
team2015 %<>%
arrange(team)
data <-
team2015 %>%
select(fg:pts.g) %>%
select(-fg.,-X3p., -X2p., -pts.g, -ft.)
rownames(data) <-
team2015$team
mp <-
team2015$mp
data.per240 <-
data %>%
apply(2, function(x) x / mp * 48 * 5)
row.names(data.per240) <-
team2015$team
unclustered <-
data.per240 %>%
d3heatmap(colors = "RdYlBu", theme = "dark",cluster = F)
clustered <-
data.per240 %>%
d3heatmap(colors = "RdYlBu", theme = "dark")
clustered
unclustered
library(DT)
data.per240 <-
data.per240[,colnames(data.per240)[c(17,1:6,12,11:9, 13:14, 7:8,16)]]
table <-
data.per240 %>%
datatable(colnames =
colnames(data.per240), rownames = TRUE, filter = 'top',
options = list(pageLength = 30, lengthMenu = c(5, 10, 15, 20, 25, 30), dom = 'Rlfrtip',colReorder = list(realtime = TRUE), initComplete = JS(
"function(settings, json) {",
"$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
"}")),
caption = htmltools::tags$caption(
style = 'caption-side: bottom; text-align: center;',
htmltools::em('Team Per 240M Stats')),
escape = FALSE, extensions = c('ColReorder', 'Responsive')
) %>% formatRound(3,columns = 1:30)
table
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment