Skip to content

Instantly share code, notes, and snippets.

@xvzftube
Created May 2, 2023 18:17
Show Gist options
  • Save xvzftube/f5c481d5710b69a0155db0f3d1c2f81e to your computer and use it in GitHub Desktop.
Save xvzftube/f5c481d5710b69a0155db0f3d1c2f81e to your computer and use it in GitHub Desktop.
how to tune with tidymodels
##################################################################################
library(tidyverse)
library(tidymodels)
library(lightgbm)
library(janitor)
library(skimr)
library(bonsai)
library(baguette)
library(doParallel)
train_df <- read_csv("train.csv") |>
clean_names() |>
mutate(sale_price = log(sale_price))
test_df <- read_csv("test.csv") |>
clean_names()
SalePrice_recipe <- recipe(train_df, sale_price ~ .) %>%
step_rm(c(id, street, utilities)) %>%
step_rm(c(alley, fireplace_qu, pool_qc, fence, misc_feature)) %>%
step_log(all_numeric(),-all_outcomes(), offset = 1) %>%
step_normalize(all_numeric(),-all_outcomes()) %>%
step_other(all_nominal(), -all_outcomes(), threshold = 0.01) %>%
step_novel(all_predictors(), -all_numeric()) %>%
step_impute_knn(all_predictors()) %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_zv(all_predictors()) # remove 0 variance columns
# lightgbm - using another Kagglers params
spec_lightgbm <- boost_tree(
mode = "regression",
engine = "lightgbm",
mtry = 58,
trees = 294,
min_n = 40,
tree_depth = 14,
learn_rate = 0.0999,
loss_reduction = 0.000149,
sample_size = 0.374
)
workflow_lgbm <- workflow() %>% add_recipe(SalePrice_recipe) %>% add_model(spec_lightgbm)
fit_lgbm <- fit(workflow_lgbm, data = train_df)
df_submission <- read_csv("sample_submission.csv") %>%
mutate(exp(predict(fit_lgbm, test_df))) %>%
transmute(Id = Id, SalePrice = .pred)
df_submission %>%
write_csv("submission_boost.csv")
# svm - do my own hyperparameter tuning grid with bootstraps
boots <- bootstraps(train_df, times = 30)
spec_svm <- svm_rbf(
mode = "regression",
engine = "kernlab",
#cost = tune(),
cost = 21.4,
#rbf_sigma = tune()
rbf_sigma = 0.0000170
)
workflow_svm <- workflow() %>% add_recipe(SalePrice_recipe) %>% add_model(spec_svm)
#all_cores <- parallel::detectCores(logical = FALSE)
#cl <- makePSOCKcluster(all_cores)
#registerDoParallel(cl)
#svm_grid <- tune_grid(
# workflow_svm,
# resamples = boots
#)
#params_svm <- select_best(svm_grid, 'rmse')
#spec_svm <- finalize_model(spec_svm, parameters = params_svm)
#workflow_svm <- workflows::update_model(workflow_svm, spec_svm)
#fit_svm <- fit(workflow_svm, data = train_df)
fit_svm <- fit(workflow_svm, data = train_df)
df_submission <- read_csv("sample_submission.csv") %>%
mutate(exp(predict(fit_svm, test_df))) %>%
transmute(Id = Id, SalePrice = .pred)
df_submission %>%
write_csv("submission_svm.csv")
# lasso
spec_lasso <- linear_reg(
mode = "regression",
engine = "glmnet",
#penalty = tune(),
penalty = 0.00153,
mixture = 1
)
workflow_lasso <- workflow() %>% add_recipe(SalePrice_recipe) %>% add_model(spec_lasso)
#all_cores <- parallel::detectCores(logical = FALSE)
#cl <- makePSOCKcluster(all_cores)
#registerDoParallel(cl)
#lasso_grid <- tune_grid(
# workflow_lasso,
# resamples = boots
#)
#penalty = 0.00153
#params_lasso <- select_best(lasso_grid, 'rmse')
# cost rbf_sigma .metric .estimator mean n std_err .config .best .bound
# <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <chr> <dbl> <dbl>
# 1 0.0746 0.00122 rmse standard 0.158 30 0.00156 Preprocessor1_Model08 0.158 0.159
#spec_lasso <- finalize_model(spec_lasso, parameters = params_lasso)
#workflow_lasso <- workflows::update_model(workflow_lasso, spec_lasso)
fit_lasso <- fit(workflow_lasso, data = train_df)
df_submission <- read_csv("sample_submission.csv") %>%
mutate(exp(predict(fit_lasso, test_df))) %>%
transmute(Id = Id, SalePrice = .pred)
df_submission %>%
write_csv("submission_lasso.csv")
df_submission <- read_csv("sample_submission.csv") %>%
mutate((exp(predict(fit_svm, test_df)) + exp(predict(fit_lasso, test_df)) + exp(predict(fit_lgbm, test_df))) / 3) %>%
transmute(Id = Id, SalePrice = .pred)
df_submission %>%
write_csv("submission_all.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment