bmweiner/ts.r

## ts.r
# Time Series Forecasting in R

## Example Datasets
# R Datasets package (e.g. AirPassengers, austres)
# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html

# Census Time Series
# https://www.census.gov/econ/currentdata/dbsearch

# Data used for this demo:
# https://www.census.gov/econ/currentdata/dbsearch?program=MRTS&startYear=1992&endYear=2017&categories=441&dataType=SM&geoLevel=US&notAdjusted=1&submit=GET+DATA&releaseScheduleId=

## References
# https://people.duke.edu/~rnau/411home.htm
# http://pkg.robjhyndman.com/forecast/reference/index.html
# http://www.stat.pitt.edu/stoffer/tsa4/

## Other Usefule packages
# library(lubridate)
# library(seasonal)
# library(factoextra)


library(forecast)
library(tseries)

# Read and explore data
setwd(dir="C:\\Users\\bweiner\\Desktop\\example")
df <- read.csv("auto_m.csv", skip = 6, nrows = 300, stringsAsFactors = FALSE)

summary(df)
plot(df$Value, xlab="Month ", ylab="Motor Vehicle and Parts Dealers (1M USD)", type='l')

# Roll-Up / Agglomerate if needed
# data is monthly

# Convert to Time Series class
y <- ts(df$Value, start=1992, frequency=12)  # 25 years, 300 observations 1992 - 2016
plot(y)

# Removing outliers
tsoutliers(y)
tsclean(y)

# Explore Time Series
tsdisplay(y)

# Check if series is autocorrelated (serial correlation exists)
Box.test(y)  # h0: y is independent, e.g. rejected p < 0.05

# check stationarity
kpss.test(y)  #h0: y is stationary
# adf.test(y)

# Decompose Time Series
plot(stl(y, 'periodic'))

# Transforming the series
# BoxCox(y, BoxCox.lambda(y))


# Fit Model
fit <- auto.arima(y)  # takes care of transform, picks model with lowest AIC
# fit <- ets(y)
# fit <- nnetar(y)

fit

# Forecast with model
fcast <- forecast(fit, h=12)
# fcast <- stlm(y)
# fcast <- meanf(y)

plot(fcast)
#lines(residuals(fcast), col="brown")

residuals(fcast)

# View Forecast accuracy
accuracy(fc)

# validate residuals
tsdisplay(fit$residuals)
mean(fit$residuals, na.rm = 1)
Box.test(fit$residuals)
qqnorm(residuals(fit))
qqline(residuals(fit))


# split time series - test / train
split.ts <- function(x, test.size = 0.25){
  if (test.size > 0 & test.size < 1){
    train.obs = round(length(time(x)) * (1 - test.size))
  } else {
    train.obs = length(time(x)) - test.size
  }
  train <- window(x, end=tsp(x)[1] + (train.obs - 1) / tsp(x)[3])
  test <- window(x, start=tsp(x)[1] + train.obs / tsp(x)[3])
  return(list(train=train, test=test))
}

z <- split.ts(y, 12)

fit <- auto.arima(z$train)
fcast <- forecast(fit, 12)
accuracy(fcast, x = z$test)
lines(z$test)
	# Time Series Forecasting in R

	## Example Datasets
	# R Datasets package (e.g. AirPassengers, austres)
	# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/00Index.html

	# Census Time Series
	# https://www.census.gov/econ/currentdata/dbsearch

	# Data used for this demo:
	# https://www.census.gov/econ/currentdata/dbsearch?program=MRTS&startYear=1992&endYear=2017&categories=441&dataType=SM&geoLevel=US&notAdjusted=1&submit=GET+DATA&releaseScheduleId=

	## References
	# https://people.duke.edu/~rnau/411home.htm
	# http://pkg.robjhyndman.com/forecast/reference/index.html
	# http://www.stat.pitt.edu/stoffer/tsa4/

	## Other Usefule packages
	# library(lubridate)
	# library(seasonal)
	# library(factoextra)


	library(forecast)
	library(tseries)

	# Read and explore data
	setwd(dir="C:\\Users\\bweiner\\Desktop\\example")
	df <- read.csv("auto_m.csv", skip = 6, nrows = 300, stringsAsFactors = FALSE)

	summary(df)
	plot(df$Value, xlab="Month ", ylab="Motor Vehicle and Parts Dealers (1M USD)", type='l')

	# Roll-Up / Agglomerate if needed
	# data is monthly

	# Convert to Time Series class
	y <- ts(df$Value, start=1992, frequency=12) # 25 years, 300 observations 1992 - 2016
	plot(y)

	# Removing outliers
	tsoutliers(y)
	tsclean(y)

	# Explore Time Series
	tsdisplay(y)

	# Check if series is autocorrelated (serial correlation exists)
	Box.test(y) # h0: y is independent, e.g. rejected p < 0.05

	# check stationarity
	kpss.test(y) #h0: y is stationary
	# adf.test(y)

	# Decompose Time Series
	plot(stl(y, 'periodic'))

	# Transforming the series
	# BoxCox(y, BoxCox.lambda(y))


	# Fit Model
	fit <- auto.arima(y) # takes care of transform, picks model with lowest AIC
	# fit <- ets(y)
	# fit <- nnetar(y)

	fit

	# Forecast with model
	fcast <- forecast(fit, h=12)
	# fcast <- stlm(y)
	# fcast <- meanf(y)

	plot(fcast)
	#lines(residuals(fcast), col="brown")

	residuals(fcast)

	# View Forecast accuracy
	accuracy(fc)

	# validate residuals
	tsdisplay(fit$residuals)
	mean(fit$residuals, na.rm = 1)
	Box.test(fit$residuals)
	qqnorm(residuals(fit))
	qqline(residuals(fit))


	# split time series - test / train
	split.ts <- function(x, test.size = 0.25){
	if (test.size > 0 & test.size < 1){
	train.obs = round(length(time(x)) * (1 - test.size))
	} else {
	train.obs = length(time(x)) - test.size
	}
	train <- window(x, end=tsp(x)[1] + (train.obs - 1) / tsp(x)[3])
	test <- window(x, start=tsp(x)[1] + train.obs / tsp(x)[3])
	return(list(train=train, test=test))
	}

	z <- split.ts(y, 12)

	fit <- auto.arima(z$train)
	fcast <- forecast(fit, 12)
	accuracy(fcast, x = z$test)
	lines(z$test)