Skip to content

Instantly share code, notes, and snippets.

@abresler
Last active April 14, 2018 16:04
Show Gist options
  • Save abresler/91d6b8ef6d3c25cc7923cc400bab50e0 to your computer and use it in GitHub Desktop.
Save abresler/91d6b8ef6d3c25cc7923cc400bab50e0 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(nbastatR) # devtools::install_github("abresler/nbastatR")
df_player_dict <-
nbastatR::get_bref_player_dictionary() %>%
filter(!is.na(countSeasons))
df_roty_winners <-
nbastatR::get_bref_awards(awards = c("Rookie of the Year"))
all_data <-
nbastatR::get_bref_players_seasons(
seasons = 1952:2018,
tables = c("advanced", "totals"),
assign_to_environment = F
)
df_all_rookies <-
all_data %>%
left_join(df_player_dict %>% select(slugPlayerBREF, slugSeasonRookie)) %>%
mutate(isRookie = ifelse(slugSeason == slugSeasonRookie, TRUE, FALSE)) %>%
filter(isRookie) %>%
arrange(yearSeason)
df_all_rookies <-
df_all_rookies %>%
filter(minutesTotals >= 200) %>%
mutate_if(is.numeric,
funs(ifelse(. %>% is.na(), 0 , .)))
df_all_rookies <-
df_all_rookies %>%
mutate_at(c("groupPosition", "idPosition"),
funs(factor)) %>%
asbmisc::convert_factors_to_classes() ## another propriatary package -- you need to turn idPosition & groupPositin into dummy variables
df_all_rookies <-
df_all_rookies %>%
left_join(df_roty_winners %>%
mutate(isROTY = T) %>%
select(slugSeason, slugPlayerBREF, isROTY)) %>%
mutate(isROTY = ifelse(isROTY %>% is.na(), FALSE, TRUE) %>% factor(levels = c("TRUE", "FALSE"))) %>%
select(-c(minutes, isRookie))
training <-
df_all_rookies %>%
filter(!yearSeason == 2018)
testing <-
df_all_rookies %>%
filter(yearSeason == 2018) %>%
select(-isROTY)
input_vars <-
df_all_rookies %>%
select_if(is.numeric) %>%
select(-matches("^year|^id")) %>%
names() %>%
append("isROTY")
data_training <- training %>% select(one_of(input_vars))
data_testing <- testing %>% select(one_of(input_vars))
dict_caret <-
modelR2::dictionary_caret_models()
test_models <-
modelR2::caret_models(
data_training = data_training,
data_testing = data_testing,
prediction_variable = 'isROTY',
models = c("ranger", "xgbTree", "glmnet_h2o"),
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment