Skip to content

Instantly share code, notes, and snippets.

@xvzftube
Created May 2, 2023 18:16
Show Gist options
  • Save xvzftube/b56545a9ba745ab351da69b6e46bb501 to your computer and use it in GitHub Desktop.
Save xvzftube/b56545a9ba745ab351da69b6e46bb501 to your computer and use it in GitHub Desktop.
Regression on house price example
library(tidyverse)
library(tidymodels)
library(lightgbm)
library(janitor)
library(skimr)
library(bonsai)
library(baguette)
library(doParallel)
library(darknerdthemes)
train_df <- read_csv("train.csv") |>
clean_names() |>
mutate(sale_price = log(sale_price))
test_df <- read_csv("test.csv") |>
clean_names()
SalePrice_recipe <- recipe(train_df, sale_price ~ .) %>%
step_rm(c(id, street, utilities)) %>%
step_rm(c(alley, fireplace_qu, pool_qc, fence, misc_feature)) %>%
step_log(all_numeric(),-all_outcomes(), offset = 1) %>%
step_normalize(all_numeric(),-all_outcomes()) %>%
step_other(all_nominal(), -all_outcomes(), threshold = 0.01) %>%
step_novel(all_predictors(), -all_numeric()) %>%
step_impute_knn(all_predictors()) %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_zv(all_predictors()) # remove 0 variance columns
# lightgbm
spec_lightgbm <- boost_tree(
mode = "regression",
engine = "lightgbm",
mtry = 58,
trees = 294,
min_n = 40,
tree_depth = 14,
learn_rate = 0.0999,
loss_reduction = 0.000149,
sample_size = 0.374
)
workflow_lgbm <- workflow() %>% add_recipe(SalePrice_recipe) %>% add_model(spec_lightgbm)
fit_lgbm <- fit(workflow_lgbm, data = train_df)
# svm
spec_svm <- svm_rbf(
mode = "regression",
engine = "kernlab",
cost = 21.4,
rbf_sigma = 0.0000170
)
workflow_svm <- workflow() %>% add_recipe(SalePrice_recipe) %>% add_model(spec_svm)
fit_svm <- fit(workflow_svm, data = train_df)
# lasso
spec_lasso <- linear_reg(
mode = "regression",
engine = "glmnet",
penalty = 0.00153,
mixture = 1
)
workflow_lasso <- workflow() %>% add_recipe(SalePrice_recipe) %>% add_model(spec_lasso)
fit_lasso <- fit(workflow_lasso, data = train_df)
df_plot_prep <- train_df %>%
select(sale_price) %>%
mutate(sale_price = exp(sale_price),
svm = exp(predict(fit_svm, train_df)$.pred),
lasso = exp(predict(fit_lasso, train_df)$.pred),
lgbm = exp(predict(fit_lgbm, train_df)$.pred)
) %>%
mutate(combination = (svm + lasso + lgbm) / 3) %>%
pivot_longer(cols = c(svm, lasso, lgbm, combination), names_to = "model", values_to = "predicted")
p <- df_plot_prep %>%
ggplot(aes(x = sale_price, y = predicted, color = model)) +
geom_point(alpha = 0.5) +
geom_abline(intercept = 0, slope = 1, color = 'white', size = 1, linetype = 'dashed') +
labs(title = "Predicted vs. Actual Sale Price",
x = "Actual Sale Price",
y = "Predicted Sale Price") +
darknerdthemes::nerd_theme_gruv() +
scale_color_nerd(palette = "main")
ggsave("predicted_vs_actual.png", p, width = 10, height = 10, units = "in", dpi = 300)
df_submission <- read_csv("sample_submission.csv") %>%
mutate((exp(predict(fit_svm, test_df)) + exp(predict(fit_lasso, test_df)) + exp(predict(fit_lgbm, test_df))) / 3) %>%
transmute(Id = Id, SalePrice = .pred)
df_submission %>%
write_csv("submission_all.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment