regisely/ensembles_in_modeltime.R

## ensembles_in_modeltime.R
library(tidyverse)
library(tidymodels)
library(timetk)
library(modeltime)
library(modeltime.resample)
library(modeltime.ensemble)

# Building models and calibrating in test set

splits <- time_series_split(m750, assess = "2 years", cumulative = TRUE)

recipe_spec <- recipe(value ~ date, training(splits)) %>%
    step_timeseries_signature(date) %>%
    step_rm(matches("(.iso$)|(.xts$)")) %>%
    step_normalize(matches("(index.num$)|(_year$)")) %>%
    step_dummy(all_nominal()) %>%
    step_fourier(date, K = 1, period = 12)

wflw_fit_arima <- workflow() %>%
  add_model(
    arima_reg() %>%
    set_engine("auto_arima")
  ) %>%
  add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
  fit(training(splits))

wflw_fit_prophet <- workflow() %>%
  add_model(
    prophet_reg() %>%
    set_engine("prophet")
  ) %>%
  add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
  fit(training(splits))

wflw_fit_glmnet <- workflow() %>%
  add_model(
    linear_reg(
      mixture = 0.9,
      penalty = 4.36e-6
    ) %>%
    set_engine("glmnet")
  ) %>%
  add_recipe(recipe_spec %>% step_rm(date)) %>%
  fit(training(splits))

m750_models <- modeltime_table(
    wflw_fit_arima,
    wflw_fit_prophet,
    wflw_fit_glmnet
)

resamples_tscv <- training(m750_splits) %>%
  time_series_cv(
    date_var = date,
    assess  = "2 years",
    initial = "5 years",
    skip    = "2 years",
    slice_limit = 1
  )

submodel_predictions <- m750_models %>%
  modeltime_fit_resamples(resamples = resamples_tscv)

ensemble_fit <- submodel_predictions %>%
  ensemble_model_spec(
    model_spec = linear_reg(
      penalty = tune(),
      mixture = tune()
    ) %>%
      set_engine("glmnet")
  )

calibration_tbl <- modeltime_table(ensemble_fit) %>%
  combine_modeltime_tables(m750_models) %>%
  modeltime_calibrate(testing(splits))

## Refitting models in full dataset using modeltime_refit

resamples_tscv_full <- m750 %>%
  time_series_cv(
    assess  = "2 years",
    initial = "5 years",
    skip    = "2 years",
    slice_limit = 1
  )

refit_tbl <- calibration_tbl %>%
  modeltime_refit(m750, resamples = resamples_tscv_full)

refit_tbl %>%
  modeltime_forecast(
    h = "2 years",
    actual_data = m750
  ) %>%
  plot_modeltime_forecast(.interactive = FALSE)

#' It seems that ensemble predictions are way off because the submodels
#' are not being refitted on full dataset before refitting the ensemble.

m750_models_refit <- m750_models %>%
  modeltime_refit(m750)

submodel_predictions_refit <- m750_models_refit %>%
  modeltime_fit_resamples(resamples = resamples_tscv_full)

ensemble_refit <- submodel_predictions_refit %>%
  ensemble_model_spec(
    model_spec = linear_reg(
      penalty = tune(),
      mixture = tune()
    ) %>%
      set_engine("glmnet")
  )

modeltime_table(ensemble_refit) %>%
  combine_modeltime_tables(m750_models_refit) %>%
  modeltime_forecast(
    h = "2 years",
    actual_data = m750
  ) %>%
  plot_modeltime_forecast(.interactive = FALSE, .conf_interval_show = FALSE)

#' Or, alternatively, you can call modeltime_refit two times, one without the
#' resamples argument, to refit all submodels, and one with the resamples
#' argument to refit the ensemble.

refit_tbl_other <- calibration_tbl %>%
  modeltime_refit(m750) %>%
  modeltime_refit(m750, resamples = resamples_tscv_full)

refit_tbl_other %>%
  modeltime_forecast(
    h = "2 years",
    actual_data = m750
  ) %>%
  plot_modeltime_forecast(.interactive = FALSE)

#' I guess the most imediate solution is to call modeltime_refit()
#' on model_tbl before calling modeltime_fit_resamples() in the function
#' mdl_time_refit.mdl_time_ensemble_model_spec
	library(tidyverse)
	library(tidymodels)
	library(timetk)
	library(modeltime)
	library(modeltime.resample)
	library(modeltime.ensemble)

	# Building models and calibrating in test set

	splits <- time_series_split(m750, assess = "2 years", cumulative = TRUE)

	recipe_spec <- recipe(value ~ date, training(splits)) %>%
	step_timeseries_signature(date) %>%
	step_rm(matches("(.iso$)\|(.xts$)")) %>%
	step_normalize(matches("(index.num$)\|(_year$)")) %>%
	step_dummy(all_nominal()) %>%
	step_fourier(date, K = 1, period = 12)

	wflw_fit_arima <- workflow() %>%
	add_model(
	arima_reg() %>%
	set_engine("auto_arima")
	) %>%
	add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
	fit(training(splits))

	wflw_fit_prophet <- workflow() %>%
	add_model(
	prophet_reg() %>%
	set_engine("prophet")
	) %>%
	add_recipe(recipe_spec %>% step_rm(all_predictors(), -date)) %>%
	fit(training(splits))

	wflw_fit_glmnet <- workflow() %>%
	add_model(
	linear_reg(
	mixture = 0.9,
	penalty = 4.36e-6
	) %>%
	set_engine("glmnet")
	) %>%
	add_recipe(recipe_spec %>% step_rm(date)) %>%
	fit(training(splits))

	m750_models <- modeltime_table(
	wflw_fit_arima,
	wflw_fit_prophet,
	wflw_fit_glmnet
	)

	resamples_tscv <- training(m750_splits) %>%
	time_series_cv(
	date_var = date,
	assess = "2 years",
	initial = "5 years",
	skip = "2 years",
	slice_limit = 1
	)

	submodel_predictions <- m750_models %>%
	modeltime_fit_resamples(resamples = resamples_tscv)

	ensemble_fit <- submodel_predictions %>%
	ensemble_model_spec(
	model_spec = linear_reg(
	penalty = tune(),
	mixture = tune()
	) %>%
	set_engine("glmnet")
	)

	calibration_tbl <- modeltime_table(ensemble_fit) %>%
	combine_modeltime_tables(m750_models) %>%
	modeltime_calibrate(testing(splits))

	## Refitting models in full dataset using modeltime_refit

	resamples_tscv_full <- m750 %>%
	time_series_cv(
	assess = "2 years",
	initial = "5 years",
	skip = "2 years",
	slice_limit = 1
	)

	refit_tbl <- calibration_tbl %>%
	modeltime_refit(m750, resamples = resamples_tscv_full)

	refit_tbl %>%
	modeltime_forecast(
	h = "2 years",
	actual_data = m750
	) %>%
	plot_modeltime_forecast(.interactive = FALSE)

	#' It seems that ensemble predictions are way off because the submodels
	#' are not being refitted on full dataset before refitting the ensemble.

	m750_models_refit <- m750_models %>%
	modeltime_refit(m750)

	submodel_predictions_refit <- m750_models_refit %>%
	modeltime_fit_resamples(resamples = resamples_tscv_full)

	ensemble_refit <- submodel_predictions_refit %>%
	ensemble_model_spec(
	model_spec = linear_reg(
	penalty = tune(),
	mixture = tune()
	) %>%
	set_engine("glmnet")
	)

	modeltime_table(ensemble_refit) %>%
	combine_modeltime_tables(m750_models_refit) %>%
	modeltime_forecast(
	h = "2 years",
	actual_data = m750
	) %>%
	plot_modeltime_forecast(.interactive = FALSE, .conf_interval_show = FALSE)

	#' Or, alternatively, you can call modeltime_refit two times, one without the
	#' resamples argument, to refit all submodels, and one with the resamples
	#' argument to refit the ensemble.

	refit_tbl_other <- calibration_tbl %>%
	modeltime_refit(m750) %>%
	modeltime_refit(m750, resamples = resamples_tscv_full)

	refit_tbl_other %>%
	modeltime_forecast(
	h = "2 years",
	actual_data = m750
	) %>%
	plot_modeltime_forecast(.interactive = FALSE)

	#' I guess the most imediate solution is to call modeltime_refit()
	#' on model_tbl before calling modeltime_fit_resamples() in the function
	#' mdl_time_refit.mdl_time_ensemble_model_spec