Skip to content

Instantly share code, notes, and snippets.

@bmweiner
Last active August 16, 2017 22:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bmweiner/d66e0b19ef5d97aa35d1da5cabadeb6a to your computer and use it in GitHub Desktop.
Save bmweiner/d66e0b19ef5d97aa35d1da5cabadeb6a to your computer and use it in GitHub Desktop.
# Time Series Forecasting in R
## Example Datasets
# R Datasets package (e.g. AirPassengers, austres)
# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html
# Census Time Series
# https://www.census.gov/econ/currentdata/dbsearch
# Data used for this demo:
# https://www.census.gov/econ/currentdata/dbsearch?program=MRTS&startYear=1992&endYear=2017&categories=441&dataType=SM&geoLevel=US&notAdjusted=1&submit=GET+DATA&releaseScheduleId=
## References
# https://people.duke.edu/~rnau/411home.htm
# http://pkg.robjhyndman.com/forecast/reference/index.html
# http://www.stat.pitt.edu/stoffer/tsa4/
## Other Usefule packages
# library(lubridate)
# library(seasonal)
# library(factoextra)
library(forecast)
library(tseries)
# Read and explore data
setwd(dir="C:\\Users\\bweiner\\Desktop\\example")
df <- read.csv("auto_m.csv", skip = 6, nrows = 300, stringsAsFactors = FALSE)
summary(df)
plot(df$Value, xlab="Month ", ylab="Motor Vehicle and Parts Dealers (1M USD)", type='l')
# Roll-Up / Agglomerate if needed
# data is monthly
# Convert to Time Series class
y <- ts(df$Value, start=1992, frequency=12) # 25 years, 300 observations 1992 - 2016
plot(y)
# Removing outliers
tsoutliers(y)
tsclean(y)
# Explore Time Series
tsdisplay(y)
# Check if series is autocorrelated (serial correlation exists)
Box.test(y) # h0: y is independent, e.g. rejected p < 0.05
# check stationarity
kpss.test(y) #h0: y is stationary
# adf.test(y)
# Decompose Time Series
plot(stl(y, 'periodic'))
# Transforming the series
# BoxCox(y, BoxCox.lambda(y))
# Fit Model
fit <- auto.arima(y) # takes care of transform, picks model with lowest AIC
# fit <- ets(y)
# fit <- nnetar(y)
fit
# Forecast with model
fcast <- forecast(fit, h=12)
# fcast <- stlm(y)
# fcast <- meanf(y)
plot(fcast)
#lines(residuals(fcast), col="brown")
residuals(fcast)
# View Forecast accuracy
accuracy(fc)
# validate residuals
tsdisplay(fit$residuals)
mean(fit$residuals, na.rm = 1)
Box.test(fit$residuals)
qqnorm(residuals(fit))
qqline(residuals(fit))
# split time series - test / train
split.ts <- function(x, test.size = 0.25){
if (test.size > 0 & test.size < 1){
train.obs = round(length(time(x)) * (1 - test.size))
} else {
train.obs = length(time(x)) - test.size
}
train <- window(x, end=tsp(x)[1] + (train.obs - 1) / tsp(x)[3])
test <- window(x, start=tsp(x)[1] + train.obs / tsp(x)[3])
return(list(train=train, test=test))
}
z <- split.ts(y, 12)
fit <- auto.arima(z$train)
fcast <- forecast(fit, 12)
accuracy(fcast, x = z$test)
lines(z$test)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment