Skip to content

Instantly share code, notes, and snippets.

@regisely
Last active April 29, 2022 21:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save regisely/435d4a2d444c44bd83bb8669d163f4f9 to your computer and use it in GitHub Desktop.
Save regisely/435d4a2d444c44bd83bb8669d163f4f9 to your computer and use it in GitHub Desktop.
Submodels need to be reffited on full dataset before reffiting ensembles in modeltime_refit
library(tidyverse)
library(tidymodels)
library(timetk)
library(modeltime)
library(modeltime.resample)
library(modeltime.ensemble)
# Building models and calibrating in test set
splits <- time_series_split(m750, assess = "2 years", cumulative = TRUE)
recipe_spec <- recipe(value ~ date, training(splits)) %>%
step_timeseries_signature(date) %>%
step_rm(matches("(.iso$)|(.xts$)")) %>%
step_normalize(matches("(index.num$)|(_year$)")) %>%
step_dummy(all_nominal()) %>%
step_fourier(date, K = 1, period = 12)
wflw_fit_arima <- workflow() %>%
add_model(
arima_reg() %>%
set_engine("auto_arima")
) %>%
add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
fit(training(splits))
wflw_fit_prophet <- workflow() %>%
add_model(
prophet_reg() %>%
set_engine("prophet")
) %>%
add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
fit(training(splits))
wflw_fit_glmnet <- workflow() %>%
add_model(
linear_reg(
mixture = 0.9,
penalty = 4.36e-6
) %>%
set_engine("glmnet")
) %>%
add_recipe(recipe_spec %>% step_rm(date)) %>%
fit(training(splits))
m750_models <- modeltime_table(
wflw_fit_arima,
wflw_fit_prophet,
wflw_fit_glmnet
)
resamples_tscv <- training(m750_splits) %>%
time_series_cv(
date_var = date,
assess = "2 years",
initial = "5 years",
skip = "2 years",
slice_limit = 1
)
submodel_predictions <- m750_models %>%
modeltime_fit_resamples(resamples = resamples_tscv)
ensemble_fit <- submodel_predictions %>%
ensemble_model_spec(
model_spec = linear_reg(
penalty = tune(),
mixture = tune()
) %>%
set_engine("glmnet")
)
calibration_tbl <- modeltime_table(ensemble_fit) %>%
combine_modeltime_tables(m750_models) %>%
modeltime_calibrate(testing(splits))
## Refitting models in full dataset using modeltime_refit
resamples_tscv_full <- m750 %>%
time_series_cv(
assess = "2 years",
initial = "5 years",
skip = "2 years",
slice_limit = 1
)
refit_tbl <- calibration_tbl %>%
modeltime_refit(m750, resamples = resamples_tscv_full)
refit_tbl %>%
modeltime_forecast(
h = "2 years",
actual_data = m750
) %>%
plot_modeltime_forecast(.interactive = FALSE)
#' It seems that ensemble predictions are way off because the submodels
#' are not being refitted on full dataset before refitting the ensemble.
m750_models_refit <- m750_models %>%
modeltime_refit(m750)
submodel_predictions_refit <- m750_models_refit %>%
modeltime_fit_resamples(resamples = resamples_tscv_full)
ensemble_refit <- submodel_predictions_refit %>%
ensemble_model_spec(
model_spec = linear_reg(
penalty = tune(),
mixture = tune()
) %>%
set_engine("glmnet")
)
modeltime_table(ensemble_refit) %>%
combine_modeltime_tables(m750_models_refit) %>%
modeltime_forecast(
h = "2 years",
actual_data = m750
) %>%
plot_modeltime_forecast(.interactive = FALSE, .conf_interval_show = FALSE)
#' Or, alternatively, you can call modeltime_refit two times, one without the
#' resamples argument, to refit all submodels, and one with the resamples
#' argument to refit the ensemble.
refit_tbl_other <- calibration_tbl %>%
modeltime_refit(m750) %>%
modeltime_refit(m750, resamples = resamples_tscv_full)
refit_tbl_other %>%
modeltime_forecast(
h = "2 years",
actual_data = m750
) %>%
plot_modeltime_forecast(.interactive = FALSE)
#' I guess the most imediate solution is to call modeltime_refit()
#' on model_tbl before calling modeltime_fit_resamples() in the function
#' mdl_time_refit.mdl_time_ensemble_model_spec
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment