Skip to content

Instantly share code, notes, and snippets.

@hliang
Created June 6, 2018 01:52
Show Gist options
  • Save hliang/070cb7ddc0e5f86b49b5ea342c93f1b2 to your computer and use it in GitHub Desktop.
Save hliang/070cb7ddc0e5f86b49b5ea342c93f1b2 to your computer and use it in GitHub Desktop.
time series analysis - demo script
# king death
kings <- scan("http://robjhyndman.com/tsdldata/misc/kings.dat",skip=3)
kings
# store the data in a time series object
kingstimeseries <- ts(kings)
kingstimeseries
# the number of births per month in New York city, from January 1946 to December 1959 (originally collected by Newton)
births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat")
birthstimeseries <- ts(births, frequency=12, start=c(1946,1))
birthstimeseries
# monthly sales for a souvenir shop at a beach resort town in Queensland, Australia, for January 1987-December 1993 (original data from Wheelwright and Hyndman, 1998).
souvenir <- scan("http://robjhyndman.com/tsdldata/data/fancy.dat")
souvenirtimeseries <- ts(souvenir, frequency=12, start=c(1987,1))
souvenirtimeseries
# this time series could probably be described using an additive model, since the random fluctuations in the data are roughly constant in size over time.
plot.ts(kingstimeseries)
# seasonal variation
plot.ts(birthstimeseries)
# the size of the seasonal fluctuations and random fluctuations seem to increase with the level of the time series. we may need to transform the time series in order to get a transformed time series that can be described using an additive model.
plot.ts(souvenirtimeseries)
logsouvenirtimeseries <- log(souvenirtimeseries)
plot.ts(logsouvenirtimeseries)
## Decomposing Time Series
# Decomposing Non-Seasonal Data
library("TTR")
kingstimeseriesSMA3 <- SMA(kingstimeseries,n=3)
plot.ts(kingstimeseriesSMA3)
kingstimeseriesSMA8 <- SMA(kingstimeseries,n=8)
plot.ts(kingstimeseriesSMA8)
# Decomposing Seasonal Data
birthstimeseriescomponents <- decompose(birthstimeseries)
birthstimeseriescomponents$seasonal
plot(birthstimeseriescomponents)
# Seasonally Adjusting
birthstimeseriesseasonallyadjusted <- birthstimeseries - birthstimeseriescomponents$seasonal
plot(birthstimeseriesseasonallyadjusted)
kingtimeseriesdiff1 <- diff(kingstimeseries, differences=1)
plot.ts(kingtimeseriesdiff1)
acf(kingtimeseriesdiff1, lag.max=20)
acf(kingtimeseriesdiff1, lag.max=20, plot=FALSE)
pacf(kingtimeseriesdiff1, lag.max=20) # plot a partial correlogram
pacf(kingtimeseriesdiff1, lag.max=20, plot=FALSE) # get the partial autocorrelation values
auto.arima(kings)
#
#
setwd("~/")
## Monthly Retail Sales - Sporting goods, hobby, book, and music stores
hobby_adj = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=3, nrows=25, row.names=1)
hobby_fac = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=36, nrows=25, row.names=1)
hobby = hobby_adj * hobby_fac # raw value before seasonal adjustment
# rownames(hobby) = hobby[, 1]
hobby = as.vector(t(hobby)); hobby
hobby_ts = ts(hobby, frequency=12, start=c(1992,1))
plot(hobby_ts, main="Monthly Retail Sales - Sporting goods, hobby, book, and music stores", ylab="Million Dollars")
# A seasonal time series consists of a trend component, a seasonal component and an irregular component. Decomposing the time series means separating the time series into these three components: that is, estimating these three components.
hobby_components <- decompose(hobby_ts)
plot(hobby_components$seasonal)
# Seasonally Adjusting
# estimate the seasonal component using "decompose()", and then subtract the seasonal component from the original time series
hobby_ts_seasonallyadjusted <- hobby_ts - hobby_components$seasonal
plot(hobby_ts_seasonallyadjusted)
## Forecasting
# Holt-Winters Exponential Smoothing
hobby_fc_1 = HoltWinters(hobby_ts)
hobby_fc_1
hobby_fc_1$SSE
plot(hobby_fc_1)
hobby_fc_2 = forecast.HoltWinters(hobby_fc_1, h=36)
plot(hobby_fc_2)
acf(hobby_fc_2$residuals, lag.max=36, na.action=na.pass)
Box.test(hobby_fc_2$residuals, lag=36, type="Ljung-Box")
plot.ts(hobby_fc_2$residuals) # make a time plot
plotForecastErrors(hobby_fc_2$residuals) # make a histogram
hist(hobby_fc_2$residuals)
# arima
hobby_arima_fit = auto.arima(hobby)
hobby_fc_3 = forecast.Arima(hobby_arima_fit, h=36)
plot.forecast(hobby_fc_3)
###############################
## Step 0: Prepare Time Series
setwd("~/")
## Monthly Retail Sales - Sporting goods, hobby, book, and music stores
hobby_adj = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=3, nrows=25, row.names=1)
hobby_fac = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=36, nrows=25, row.names=1)
hobby = hobby_adj * hobby_fac # raw value before seasonal adjustment
# rownames(hobby) = hobby[, 1]
hobby = as.vector(t(hobby)); hobby
hobby_ts = ts(hobby, frequency=12, start=c(1992,1))
## Step 1: Visualize the Time Series
plot(hobby_ts)
hobby_ts_components <- decompose(hobby_ts)
plot(hobby_ts_components)
## Step 2: Stationarize the Series
# test for stationary
adf.test(hobby_ts, alternative="stationary", k=0)
## Step 3: Find Optimal Parameters
par(mfrow=c(2,1))
acf(hobby_ts)
pacf(hobby_ts)
par(mfrow=c(1,1))
auto.arima(hobby_ts)
## Step 4: Build ARIMA Model
hobby_autoarima_fit = auto.arima(hobby_ts)
# # manually setting p, d, q
# fit <- arima(hobby_ts, order=c(1, 1, 2), seasonal = list(order = c(2, 1, 2), period = 12))
# fit <- arima(hobby_ts, order=c(0, 1, 2), seasonal = list(order = c(2, 1, 2), period = 12))
## Step 5: Make Predictions
hobby_fc_4 = forecast.Arima(hobby_autoarima_fit, h=12*3)
plot.forecast(hobby_fc_4)
# pred <- predict(fit, n.ahead = 12*3)
# ts.plot(hobby_ts, pred$pred, lty = c(1,3))
# plot(pred$pred)
---
title: "Time Series Analysis Demo"
author: "H. Liang"
date: "April 28, 2017"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
## Data set 1: Monthly Retail Sales - Sporting goods, hobby, book, and music stores
Source: [U.S. Bureau of the Censu](http://www.census.gov/retail/)
### Step 0: Prepare Time Series
Read data and create time series objects:
```{r, echo=FALSE}
library(forecast)
library(tseries)
hobby_adj = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=3, nrows=25, row.names=1)
hobby_fac = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=36, nrows=25, row.names=1)
hobby = hobby_adj * hobby_fac # raw value before seasonal adjustment
hobby = as.vector(t(hobby))
```
```{r, echo=TRUE}
hobby_ts = ts(hobby, frequency=12, start=c(1992,1))
```
### Step 1: Visualize the Time Series
Plot the data:
```{r}
plot(hobby_ts, main="Monthly Retail Sales", ylab="Million Dollars")
```
```{r, echo=FALSE}
hobby_ts_components <- decompose(hobby_ts)
plot(hobby_ts_components)
```
### Step 2: Stationarize the Series
Stationarize the Series if necessary. Then test for stationary:
```{r}
adf.test(hobby_ts, alternative="stationary", k=0)
```
### Step 3: Find Optimal Parameters
check out the acf and pacf plots:
```{r, echo=FALSE}
# par(mfrow=c(2,1))
acf(hobby_ts)
pacf(hobby_ts)
# par(mfrow=c(1,1))
# auto.arima(hobby_ts)
```
### Step 4: Build the Model
Build a model manually, or let `auto.arima()` search for the best one (the search could be slow).
```{r}
hobby_autoarima_fit = auto.arima(hobby_ts)
hobby_autoarima_fit
```
### Step 5: Make Predictions
Predict the future 3 years:
```{r}
hobby_fc_4 = forecast.Arima(hobby_autoarima_fit, h=12*3)
plot.forecast(hobby_fc_4)
```
## Data set 2: User login frequency
We extracted hourly login counts from `last` output. Results of one login node are used, sessions of 5 seconds or shorter are ignored.
```{r, echo=FALSE}
suppressMessages(library(lubridate))
suppressMessages(library(zoo))
suppressWarnings(library(xts))
login_raw = read.table("~/time_series/last_ln0_0426", header=FALSE, stringsAsFactors=FALSE, nrows=35851)
# head(login_raw)
login = login_raw[grep("^\\(00:0[0-5]\\)$", login_raw$V10, invert=TRUE), ] # remove login sessions of 5 seconds or shorter
login$datetime = paste(login$V5, login$V6, login$V7, sep=" ")
login$datetime = strptime(login$datetime, format="%b %e %R")
login2 = aggregate(list(counts=login$datetime),
list(hourofday = cut(login$datetime, "1 hour")),
length)
login2$hourofday = as.POSIXct(strptime(login2$hourofday, format="%Y-%m-%d %H:%M:%S")) # convert to dat/time format (POSIXlt to POSIXct)
# str(login2)
allhours = seq(min(login$datetime), max(login$datetime), by="1 hour") # POSIXct
allhours = data.frame(allhours=floor_date(allhours, unit="hour"))
# str(allhours)
# allhours$allhours = as.POSIXlt(allhours$allhours)
# head(login); dim(login)
# head(login$datetime)
login3 = merge(login2, allhours, by.x="hourofday", by.y="allhours", all=TRUE)
login3$counts[is.na(login3$counts)] = 0
head(login3)
#dim(login3)
#plot(login2$hourofday, login2$x, type="l", xlab="time", ylab="logins")
#plot(login3$hourofday, login3$counts, type="l", xlab="time", ylab="logins")
## convert to zoo boject
login4 = zoo(login3$counts, order.by=login3$hourofday)
login5 = ts(login4, frequency=12, start=c(1992,1))
login_ts = xts(login3$counts, order.by=login3$hourofday)
attr(login_ts, 'frequency') = 24 # set frequency attribute
#str(login_ts)
#
```
### Step 1: Visualize the Time Series
Plot the data:
```{r}
plot(login_ts)
# Decomposing
login_ts_comp <- decompose(as.ts(login_ts))
plot(login_ts_comp)
```
### Step 2: Stationarize the Series
Stationarize the Series if necessary. Then test for stationary:
```{r}
adf.test(login_ts, alternative="stationary", k=0)
```
### Step 3: Find Optimal Parameters
check out the acf and pacf plots:
```{r, echo=FALSE}
# par(mfrow=c(2,1))
acf(login_ts)
pacf(login_ts)
# par(mfrow=c(1,1))
# auto.arima(hobby_ts)
```
### Step 4: Build the Model
Build a model manually, or let `auto.arima()` search for the best one (the search could be slow).
```{r}
#login_fit = auto.arima(login_ts) # auto.arima: ARIMA(1,0,1)(1,0,0)[24] with non-zero mean
#login_fit
login_fit2 <- arima(login_ts, order=c(1, 0, 1), seasonal = list(order = c(1, 0, 0), period = 24))
```
### Step 5: Make Predictions
Predict the future 7 days:
```{r}
#login_fc = forecast.Arima(login_fit, h=24*7)
#plot.forecast(login_fc)
login_fc2 = forecast.Arima(login_fit2, h=24*7)
plot.forecast(login_fc2)
```
### Also Try Exponential Smoothing method
```{r}
login_fit3 <- HoltWinters(login_ts) # Holt-Winters Exponential Smoothing
#login_fit3
plot(login_fit3)
login_fc3 <- forecast.HoltWinters(login_fit3, h=24*7)
plot.forecast(login_fc3)
```
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment