Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Time series forecasting with H2O.
install.packages("timetk")
install.packages("tidyquant")
library(h2o) # Awesome ML Library
library(timetk) # Toolkit for working with time series in R
library(tidyquant) # Loads tidyverse, financial pkgs, used to get data
beer_sales_tbl <- tq_get("S4248SM144NCEN", get = "economic.data", from = "2010-01-01", to = "2017-10-27")
beer_sales_tbl %>%
ggplot(aes(date, price)) +
# Train Region
annotate("text", x = ymd("2012-01-01"), y = 7000,
color = palette_light()[[1]], label = "Train Region") +
# Validation Region
geom_rect(xmin = as.numeric(ymd("2016-01-01")),
xmax = as.numeric(ymd("2016-12-31")),
ymin = 0, ymax = Inf, alpha = 0.02,
fill = palette_light()[[3]]) +
annotate("text", x = ymd("2016-07-01"), y = 7000,
color = palette_light()[[1]], label = "Validation\nRegion") +
# Test Region
geom_rect(xmin = as.numeric(ymd("2017-01-01")),
xmax = as.numeric(ymd("2017-08-31")),
ymin = 0, ymax = Inf, alpha = 0.02,
fill = palette_light()[[4]]) +
annotate("text", x = ymd("2017-05-01"), y = 7000,
color = palette_light()[[1]], label = "Test\nRegion") +
# Data
geom_line(col = palette_light()[1]) +
geom_point(col = palette_light()[1]) +
geom_ma(ma_fun = SMA, n = 12, size = 1) +
# Aesthetics
theme_tq() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y") +
labs(title = "Beer Sales: 2007 through 2017",
subtitle = "Train, Validation, and Test Sets Shown")
beer_sales_tbl %>% glimpse()
beer_sales_tbl_aug <- beer_sales_tbl %>% tk_augment_timeseries_signature()
beer_sales_tbl_aug %>% glimpse()
beer_sales_tbl_clean <- beer_sales_tbl_aug %>%
select_if(~ !is.Date(.)) %>%
select_if(~ !any(is.na(.))) %>%
mutate_if(is.ordered, ~ as.character(.) %>% as.factor)
beer_sales_tbl_clean %>% glimpse()
train_tbl <- beer_sales_tbl_clean %>% filter(year < 2016)
valid_tbl <- beer_sales_tbl_clean %>% filter(year == 2016)
test_tbl <- beer_sales_tbl_clean %>% filter(year == 2017)
h2o.init()
h2o.no_progress()
train_h2o <- as.h2o(train_tbl)
valid_h2o <- as.h2o(valid_tbl)
test_h2o <- as.h2o(test_tbl)
y <- "price"
x <- setdiff(names(train_h2o), y)
automl_models_h2o <- h2o.automl(
x = x,
y = y,
training_frame = train_h2o,
validation_frame = valid_h2o,
leaderboard_frame = test_h2o,
max_runtime_secs = 60,
stopping_metric = "deviance")
automl_leader <- automl_models_h2o@leader
pred_h2o <- h2o.predict(automl_leader, newdata = test_h2o)
h2o.performance(automl_leader, newdata = test_h2o)
error_tbl <- beer_sales_tbl %>%
filter(lubridate::year(date) == 2017) %>%
add_column(pred = pred_h2o %>% as.tibble() %>% pull(predict)) %>%
rename(actual = price) %>%
mutate(
error = actual - pred,
error_pct = error / actual
)
error_tbl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.