Skip to content

Instantly share code, notes, and snippets.

@kierisi
Created July 23, 2021 01:42
Show Gist options
  • Save kierisi/b6be49677cf6243135bdc2c1aebb9a5b to your computer and use it in GitHub Desktop.
Save kierisi/b6be49677cf6243135bdc2c1aebb9a5b to your computer and use it in GitHub Desktop.
# setup -------------------------------------------------------------------
library(tidymodels)
library(tidyverse)
library(baguette)
library(janitor)
library(DataExplorer)
library(here)
tidymodels_prefer()
options(tidymodels.dark = TRUE)
grid_control <- control_grid(save_pred = TRUE,
save_workflow = TRUE,
verbose = TRUE,
extract = extract_model)
# import and inspect ------------------------------------------------------
path <- "05_lap_02_episode_08"
train <- read_csv(here(path, "train.csv"))
test <- read_csv(here(path, "test.csv"))
glimpse(train)
plot_intro(train)
plot_histogram(train)
plot_bar(train)
# wrangling ---------------------------------------------------------------
#' AT MINIMUM RUN THIS:
train_tidy <- train %>%
clean_names() %>%
mutate(release_month = as.numeric(release_month),
release_day = as.numeric(release_day)) %>%
select(id, popularity, duration_ms, danceability:release_day)
glimpse(train_tidy)
test_tidy <- test %>%
clean_names() %>%
mutate(release_month = as.numeric(release_month),
release_day = as.numeric(release_day)) %>%
select(id, duration_ms, danceability:release_day)
train_min <- train %>%
clean_names() %>%
select(id, popularity, release_year)
test_min <- test %>%
clean_names() %>%
select(id, release_year)
# RNG model ---------------------------------------------------------------
set.seed(406)
lol <- as_tibble(runif(9000))
glimpse(lol)
glimpse(test)
wtf <- test %>%
select(id) %>%
bind_cols(lol) %>%
rename(popularity = value)
glimpse(wtf)
wtf %>%
write_csv(here(path, "lolwhy.csv"))
# spend our data ----------------------------------------------------------
set.seed(406)
folds <- vfold_cv(train_tidy, v = 5)
folds_grid <- vfold_cv(train_tidy, v = 10, repeats = 5)
# bagged tree model -------------------------------------------------------
## WTF got the weirdest error?!?!
bb_bt <- recipe(popularity ~ .,
data = train_tidy) %>%
step_novel(all_nominal_predictors()) %>%
update_role(id, new_role = "id")
bb_bt_spec <- bag_tree(min_n = 10) %>%
set_engine('rpart', times = 25) %>%
set_mode('regression')
bb_bt_workflow <- workflow() %>%
add_recipe(bb_bt) %>%
add_model(bb_bt_spec)
bb_bt_res <- fit_resamples(
bb_bt_workflow,
folds,
control = grid_control
)
collect_metrics(bb_bt_res)
bb_bt_fit <- fit(bb_bt_workflow, train_tidy)
bb_bt_pred <- augment(bb_bt_fit, new_data = test_tidy)
glimpse(bb_bt_pred)
bb_bt_pred %>%
# CHECK FIRST
#select(id,
#attrition_flag = .pred_1) %>%
write_csv(here(path, "bb_bt_01.csv"))
# troubleshooting --------------------------------------------------------
#' error message:
#' 1 preprocessor 1/1, model 1/1: Error: indicators should be a character,
#' not a logical.
bb_bt %>%
prep() %>%
glimpse()
juice() %>%
glimpse()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment