Created
June 26, 2020 23:11
-
-
Save mpettis/7d1b351a9d2aad34943c3147f062537c to your computer and use it in GitHub Desktop.
Timeseries forecasting, converting ts objects to real dates, controlling charting, autoplot, etc.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' --- | |
#' title: "Beer example" | |
#' author: "Matt Pettis (matthew.pettis@gmail.com)" | |
#' date: "`r Sys.Date()`" | |
#' output: | |
#' html_document: | |
#' toc: true | |
#' toc_depth: 3 | |
#' code_folding: show | |
#' editor_options: | |
#' chunk_output_type: console | |
#' --- | |
#+message=FALSE,warning=FALSE,results='asis',echo=FALSE,include=FALSE | |
# knitr::opts_chunk$set(echo = TRUE, warning=FALSE) | |
###----------------------------------------------------------------------------- | |
### Setup | |
#+message=FALSE,warning=FALSE,results='asis',echo=FALSE,include=FALSE | |
invisible( | |
lapply( | |
c( | |
"fpp", # This is where the ausbeer timeseries is | |
"zoo", "forecast", | |
"lubridate", "here", "tidyverse" | |
), | |
function(e) {suppressPackageStartupMessages(library(e, character.only = T))})) | |
#' # tslm() ausbeer | |
#' | |
#' The series: | |
#+ | |
ausbeer | |
#' Frequency of ausbeer. Frequency = 4 which, as a `ts` object, makes it assume it is quarterly data. | |
#+ | |
frequency(ausbeer) | |
#' Plot time series | |
#+ | |
autoplot(ausbeer) + | |
labs( | |
title = "Ausbeer vs. Time" | |
) | |
#' tslm() | |
#+ | |
fit_ausbeer_tslm <- tslm(ausbeer ~ trend + season) | |
## Summary of fit model | |
summary(fit_ausbeer_tslm) | |
#' Let's look at what we get in the modeled object with `str()` | |
#+ | |
str(fit_ausbeer_tslm) | |
#' The `fitted.values` are what the model predicts on the training series | |
#+ | |
fit_ausbeer_tslm$fitted.values | |
#' Let's plot it vs the actual training series. It's not a *good* forecast, but it illustrates how to do this. | |
#+ | |
autoplot(ausbeer) + | |
autolayer(fit_ausbeer_tslm$fitted.values, color="red") + | |
labs( | |
title = "Actual and Fitted values vs. Time (actual = black, forecasted = red" | |
) | |
#' Let's forecast out 20 quarters (5 years) | |
#+ | |
fcst_ausbeer_tslm <- forecast(fit_ausbeer_tslm, h=20) | |
#' Look at the forecast summary | |
#+ | |
summary(fcst_ausbeer_tslm) | |
#' Look at the object structure. We want the `mean`, which is the mean of the forecasted values. | |
#+ | |
str(fcst_ausbeer_tslm) | |
#' Plot it | |
#+ | |
autoplot(ausbeer) + | |
autolayer(fit_ausbeer_tslm$fitted.values, color="red") + | |
autolayer(fcst_ausbeer_tslm$mean, color="blue") + | |
labs( | |
title = "Actual, Fitted, and Forecasted values vs. Time (actual = black, fitted = red, forecasted = blue)" | |
) | |
#' However, `autoplot` does a nice default plot of your forecast too. You can just | |
#' feed it the returned object. | |
#' | |
#' It gives you the training series from the supplied `fit` object, and the | |
#' forecast, with mean forecast and confidence intervals. | |
#+ | |
autoplot(fcst_ausbeer_tslm) + | |
labs( | |
title = "Autoplot of the forecasted tslm() object" | |
) | |
#' However, `autoplot` does a nice default plot of your forecast too. You can just | |
#' feed it the returned object. | |
#' | |
#' It gives you the training series from the supplied `fit` object, and the | |
#' forecast, with mean forecast and confidence intervals. | |
#+ | |
autoplot(fcst_ausbeer_tslm) + | |
autolayer(fit_ausbeer_tslm$fitted.values, color="red") + | |
labs( | |
title = "Autoplot of the forecasted tslm() object, added original fitted (red line)" | |
) | |
#' However, sometimes it is nicer to have a data frame or tibble to plot, and have more | |
#' fine-grained control over the plotting aesthetics. | |
#' | |
#' First things: how to convert that decimal year (like 1988.25) to a standard R date object: | |
#+ | |
as.numeric(time(ausbeer)) | |
## date_decimal is in the lubridate package | |
## Returns a datetime stamp | |
## However, I see a rounding error for July, giving you July 2, not July 1 | |
date_decimal(as.numeric(time(ausbeer))) | |
## We can fix that with lubridate's floor_date() function | |
floor_date(date_decimal(as.numeric(time(ausbeer))), "1 quarter") | |
## Convert datetime to date | |
## as_date() in the lubridate package | |
as_date(floor_date(date_decimal(as.numeric(time(ausbeer))), "1 quarter")) | |
## OK, I don't like wrapping functions around each other, I prefer doing the equivalent piping: | |
## It is much easier to talk through. | |
ausbeer %>% | |
time() %>% | |
as.numeric() %>% | |
date_decimal() %>% | |
floor_date("1 quarter") %>% | |
as_date() | |
## The nice thing is, we can turn this into a function as well: | |
ts2qtr <- . %>% | |
time() %>% | |
as.numeric() %>% | |
date_decimal() %>% | |
floor_date("1 quarter") %>% | |
as_date() | |
ts2qtr(ausbeer) | |
## Now, let's make dataframes. First, actual data | |
tibble( | |
date = ts2qtr(ausbeer), | |
actual = as.numeric(ausbeer) | |
) -> | |
df_plot | |
df_plot | |
## Now, let's join on the fitted values | |
df_plot %>% | |
full_join( | |
tibble( | |
date = ts2qtr(fit_ausbeer_tslm$fitted.values), | |
fitted = as.numeric(fit_ausbeer_tslm$fitted.values) | |
), | |
"date" | |
) -> | |
df_plot | |
df_plot | |
## Now, let's join on the forecasted values. They will have missing values for the training range of data. | |
df_plot %>% | |
full_join( | |
tibble( | |
date = ts2qtr(fcst_ausbeer_tslm$mean), | |
forecast = as.numeric(fcst_ausbeer_tslm$mean) | |
), | |
"date" | |
) -> | |
df_plot | |
df_plot | |
tail(df_plot) | |
## OK, now we have a dataframe with a column for dates and columns for the things we want. | |
ggplot(df_plot) + | |
geom_line(aes(x=date, y=actual), color="black") + | |
geom_line(aes(x=date, y=fitted), color="red") + | |
geom_line(aes(x=date, y=forecast), color="blue") + | |
labs( | |
title = "ggplot basic creation of forecast plots" | |
) | |
## Another way I like. This one gives you a legend easer. | |
df_plot %>% | |
pivot_longer(names_to = "name", values_to = "val", -date) -> | |
df_plot_long | |
ggplot(df_plot_long, aes(x=date, y=val, group=name, color=name)) + | |
geom_line() + | |
scale_color_manual(values = c("actual"="black", "fitted"="red", "forecast"="blue")) + | |
labs( | |
title = "ggplot basic creation of forecast plots, long version" | |
) | |
## This means I can get more control over the parts of the plot too | |
ggplot(df_plot) + | |
geom_point(aes(x=date, y=actual), color="black") + | |
geom_line(aes(x=date, y=fitted), color="red") + | |
geom_line(aes(x=date, y=forecast), color="blue") + | |
labs( | |
title = "ggplot basic creation of forecast plots, actuals are points" | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment