Skip to content

Instantly share code, notes, and snippets.

@abresler
Created June 25, 2015 18:59
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save abresler/606f24d4def8eee9d285 to your computer and use it in GitHub Desktop.
Save abresler/606f24d4def8eee9d285 to your computer and use it in GitHub Desktop.
NBA 2014-15 Top 50 Scorer Heatmap
## Custom Basketball Reference Scraper Season Scraper
get_bref_player_season_stats <-
function(season.end, stat_type = c("Advanced","Totals","Per Minute","Per Game"),
team.totals = F , league = 'NBA'){
packages <-
c('rvest','magrittr','dplyr','stringr','tidyr')
lapply(packages, library, character.only = T)
bref_team_base <-
'http://www.basketball-reference.com/leagues/'
bref_base <-
'http://www.basketball-reference.com'
stat_type <-
match.arg(stat_type,choices = c("Advanced",'Totals','Per Minute',"Per Game"))
stat_type %<>%
tolower %>% gsub("\\ ",'_',.)
url <-
paste0(bref_team_base,league,'_',season.end,'_',stat_type,'.html')
css_page <-
paste0('#',stat_type)
css_player <-
'td:nth-child(2) a'
page <-
url %>%
read_html
tables <-
page %>%
html_table(fill = T)
data <-
tables[1] %>%
data.frame %>%
tbl_df
url.player <-
page %>%
html_nodes(css_player) %>%
html_attr('href') %>%
paste0('http://www.basketball-reference.com/',.)
names(data) %<>%
tolower
data %<>%
dplyr::filter(!rk == 'Rk')
data %<>%
dplyr::select(-rk) %>%
rename(id.team = tm, id.pos = pos)
if(team.totals == T){
multi.teams <-
data %>%
dplyr::filter(id.team == "TOT")
one.team <-
data %>%
dplyr::filter(!player %in% multi.teams$player)
data <-
bind_rows(multi.teams,one.team) %>%
arrange(player)
} else{
data %<>%
dplyr::filter(!id.team == 'TOT')
}
numerics <-
data %>%
dplyr::select(-player, -id.pos, -id.team) %>%
names
data[,numerics] %<>%
apply(2, as.numeric)
season.start <-
season.end - 1
id.season <-
season.start %>%
paste0('-',season.end %>% substr(3,4))
data %<>%
mutate(season.end,
id.season,
id.pos = data$id.pos %>% substr(1,2),
id.name.table = stat_type,
url.season = url)
data$player.hof <-
data$player %>%
grepl('\\*',.)
data$player %<>%
gsub('\\*','',.)
if(c('var.20') %in% names(data)){
data %<>%
dplyr::select(-var.20, -var.25)
}
return(data)
}
data15 <-
get_bref_player_season_stats(season.end = 2015,stat_type = "Per Game",team.totals = T)
plot.data <-
data15 %>%
arrange(desc(pts)) %>%
slice(1:50) %>%
select(player,mp:pts, g) %>%
select(player,pts,fga,mp,trb,drb,orb,everything())
row.names(plot.data) <-
plot.data$player
plot <-
plot.data %>%
select(pts:g) %>%
d3heatmap(scale = "column", color = scales::col_quantile("Blues", NULL, 5),
dendrogram = "row", k_row = 3)
plot ## look here
packages <-
c('materializeR', 'htmltools')
# Or beging to style with materializeCSS
#devtools::install_github("timelyportfolio/materializeR")
lapply(packages, library, character.only = T)
plot %<>%
tagList(
materialize(),
tags$h3("NBA 2014-15 Dendogram Heatmap\nTop 50 Players by Points Per Game")
,.) %>% html_print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment