Created
December 29, 2011 18:05
-
-
Save zachmayer/1535323 to your computer and use it in GitHub Desktop.
Forecasting S&P 500
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Function to cross-validate a time series. | |
cv.ts <- function(x, FUN, tsControl, xreg=NULL, ...) { | |
#Load required packages | |
stopifnot(is.ts(x)) | |
stopifnot(is.data.frame(xreg) | is.matrix(xreg) | is.null(xreg)) | |
stopifnot(require(forecast)) | |
stopifnot(require(foreach)) | |
stopifnot(require(plyr)) | |
#Load parameters from the tsControl list | |
stepSize <- tsControl$stepSize | |
maxHorizon <- tsControl$maxHorizon | |
minObs <- tsControl$minObs | |
fixedWindow <- tsControl$fixedWindow | |
summaryFunc <- tsControl$summaryFunc | |
preProcess <- tsControl$preProcess | |
#Make sure xreg object is long enough for last set of forecasts | |
if (! is.null(xreg)) { | |
xreg <- as.matrix(xreg) | |
if (nrow(xreg)<length(x)+maxHorizon) { | |
warning('xreg object too short to forecast beyond the length of the time series. | |
Appending NA values to xreg') | |
nRows <- (length(x)+maxHorizon)-nrow(xreg) | |
nCols <- dim(xreg)[2] | |
addRows <- matrix(rep(NA,nCols*nRows),nrow=nRows, ncol=nCols) | |
colnames(addRows) <- colnames(xreg) | |
xreg <- rbind(xreg,addRows) | |
} | |
} | |
#Define additional parameters | |
freq <- frequency(x) | |
n <- length(x) | |
st <- tsp(x)[1]+(minObs-2)/freq | |
#Create a matrix of actual values. | |
#X is the point in time, Y is the forecast horizon | |
#http://stackoverflow.com/questions/8140577/creating-a-matrix-of-future-values-for-a-time-series | |
formatActuals <- function(x,maxHorizon) { | |
actuals <- outer(seq_along(x), seq_len(maxHorizon), FUN="+") | |
actuals <- apply(actuals,2,function(a) x[a]) | |
actuals | |
} | |
actuals <- formatActuals(x,maxHorizon) | |
actuals <- actuals[minObs:(length(x)-1),,drop=FALSE] | |
#Create a list of training windows | |
#Each entry of this list will be the same length, if fixed=TRUE | |
#At each point in time, calculate 'maxHorizon' forecasts ahead | |
steps <- seq(1,(n-minObs),by=stepSize) | |
forcasts <- foreach(i=steps, .combine=rbind, .multicombine=FALSE) %dopar% { | |
if (is.null(xreg)) { | |
if (fixedWindow) { | |
xshort <- window(x, start=st+(i-minObs+1)/freq, end=st+i/freq) | |
} else { | |
xshort <- window(x, end=st + i/freq) | |
} | |
if (preProcess) { | |
if (testObject(lambda)) { | |
stop("Don't specify a lambda parameter when preProcess==TRUE") | |
} | |
stepLambda <- BoxCox.lambda(xshort, method='loglik') | |
xshort <- BoxCox(xshort, stepLambda) | |
} | |
out <- FUN(xshort, h=maxHorizon, ...) | |
if (preProcess) { | |
out <- InvBoxCox(out, stepLambda) | |
} | |
return(out) | |
} else if (! is.null(xreg)) { | |
if (fixedWindow) { | |
xshort <- window(x, start=st+(i-minObs+1)/freq, end=st+i/freq) | |
xregshort <- xreg[((i):(i+minObs-1)),,drop=FALSE] | |
} else { | |
xshort <- window(x, end=st + i/freq) | |
xregshort <- xreg[(1:(i+minObs-1)),,drop=FALSE] | |
} | |
newxreg <- xreg[(i+minObs):(i+minObs-1+maxHorizon),,drop=FALSE] | |
if (preProcess) { | |
if (testObject(lambda)) { | |
stop("Don't specify a lambda parameter when preProcess==TRUE") | |
} | |
stepLambda <- BoxCox.lambda(xshort, method='loglik') | |
xshort <- BoxCox(xshort, stepLambda) | |
} | |
out <- FUN(xshort, h=maxHorizon, | |
xreg=xregshort, newxreg=newxreg, ...) | |
if (preProcess) { | |
out <- InvBoxCox(out, stepLambda) | |
} | |
return(out) | |
} | |
} | |
#Extract the actuals we actually want to use | |
actuals <- actuals[steps,,drop=FALSE] | |
#Accuracy at each horizon | |
out <- data.frame( | |
ldply(1:maxHorizon, | |
function(horizon) { | |
P <- forcasts[,horizon,drop=FALSE] | |
A <- na.omit(actuals[,horizon,drop=FALSE]) | |
P <- P[1:length(A)] | |
P <- na.omit(P) | |
A <- A[1:length(P)] | |
summaryFunc(P,A) | |
} | |
) | |
) | |
#Add average accuracy, across all horizons | |
overall <- colMeans(out) | |
out <- rbind(out,overall) | |
#Add a column for which horizon and output | |
return(data.frame(horizon=c(1:maxHorizon,'All'),out)) | |
} | |
#Summary function for time series cross-validation | |
stopifnot(require(compiler)) | |
testObject <- function(object){ | |
exists(as.character(substitute(object))) | |
} | |
tsSummary <- cmpfun(function(P,A) { | |
data.frame(t(accuracy(P,A))) | |
}) | |
#Forecasting wrappers | |
meanForecast <- cmpfun(function(x,h,...) { | |
require(forecast) | |
meanf(x, h, ..., level=99)$mean | |
}) | |
naiveForecast <- cmpfun(function(x,h,...) { | |
require(forecast) | |
naive(x, h, ..., level=99)$mean | |
}) | |
auto.arimaForecast <- cmpfun(function(x,h,xreg=NULL,newxreg=NULL,...) { | |
require(forecast) | |
fit <- auto.arima(x, xreg=xreg, ...) | |
forecast(fit, h=h, level=99, xreg=newxreg)$mean | |
}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Setup | |
set.seed(1) | |
library(quantmod) | |
library(forecast) | |
#Load data | |
getSymbols('^GSPC', from='1980-01-01') | |
#Simplify to monthly level | |
GSPC <- to.monthly(GSPC) | |
Data <- as.ts(Cl(GSPC), start=1980) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Setup model cross-validation | |
myControl <- list( minObs=12, | |
stepSize=1, | |
maxHorizon=12, | |
fixedWindow=TRUE, | |
preProcess=FALSE, | |
summaryFunc=tsSummary | |
) | |
#Cross validate 3 models (model 3 is SLOW!) | |
model1 <- cv.ts(Data, meanForecast, myControl) | |
model2 <- cv.ts(Data, naiveForecast, myControl) | |
model3 <- cv.ts(Data, auto.arimaForecast, myControl, ic='bic', trace=TRUE) | |
#Find the RMSE for each model and create a matrix with our results | |
models <- list(model1,model2,model3) | |
models <- lapply(models, function(x) x[1:12,'RMSE']) | |
results <- do.call(cbind,models) | |
colnames(results) <- c('mean','naive','ar') | |
#Order by average RMSE for the 1st 3 months | |
results <- t(results) | |
order <- rowMeans(results[,1:3]) | |
results <- results[order(order),] | |
print(results) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Plot | |
color <- 1:nrow(results) | |
plot(results[1,], col=1, type='l', ylim=c(min(results),max(results))) | |
for (i in color) { | |
lines(results[i,],col=i) | |
} | |
legend("topleft",legend=row.names(results),col=color,lty=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Function to cross-validate a time series. | |
cv.ts <- function(x, FUN, tsControl, xreg=NULL, ...) { | |
#Load required packages | |
stopifnot(is.ts(x)) | |
stopifnot(is.data.frame(xreg) | is.matrix(xreg) | is.null(xreg)) | |
stopifnot(require(forecast)) | |
stopifnot(require(foreach)) | |
stopifnot(require(plyr)) | |
#Load parameters from the tsControl list | |
stepSize <- tsControl$stepSize | |
maxHorizon <- tsControl$maxHorizon | |
minObs <- tsControl$minObs | |
fixedWindow <- tsControl$fixedWindow | |
summaryFunc <- tsControl$summaryFunc | |
preProcess <- tsControl$preProcess | |
#Make sure xreg object is long enough for last set of forecasts | |
if (! is.null(xreg)) { | |
xreg <- as.matrix(xreg) | |
if (nrow(xreg)<length(x)+maxHorizon) { | |
warning('xreg object too short to forecast beyond the length of the time series. | |
Appending NA values to xreg') | |
nRows <- (length(x)+maxHorizon)-nrow(xreg) | |
nCols <- dim(xreg)[2] | |
addRows <- matrix(rep(NA,nCols*nRows),nrow=nRows, ncol=nCols) | |
colnames(addRows) <- colnames(xreg) | |
xreg <- rbind(xreg,addRows) | |
} | |
} | |
#Define additional parameters | |
freq <- frequency(x) | |
n <- length(x) | |
st <- tsp(x)[1]+(minObs-2)/freq | |
#Create a matrix of actual values. | |
#X is the point in time, Y is the forecast horizon | |
#http://stackoverflow.com/questions/8140577/creating-a-matrix-of-future-values-for-a-time-series | |
formatActuals <- function(x,maxHorizon) { | |
actuals <- outer(seq_along(x), seq_len(maxHorizon), FUN="+") | |
actuals <- apply(actuals,2,function(a) x[a]) | |
actuals | |
} | |
actuals <- formatActuals(x,maxHorizon) | |
actuals <- actuals[minObs:(length(x)-1),,drop=FALSE] | |
#Create a list of training windows | |
#Each entry of this list will be the same length, if fixed=TRUE | |
#At each point in time, calculate 'maxHorizon' forecasts ahead | |
steps <- seq(1,(n-minObs),by=stepSize) | |
forcasts <- foreach(i=steps, .combine=rbind, .multicombine=FALSE) %dopar% { | |
if (is.null(xreg)) { | |
if (fixedWindow) { | |
xshort <- window(x, start=st+(i-minObs+1)/freq, end=st+i/freq) | |
} else { | |
xshort <- window(x, end=st + i/freq) | |
} | |
if (preProcess) { | |
if (testObject(lambda)) { | |
stop("Don't specify a lambda parameter when preProcess==TRUE") | |
} | |
stepLambda <- BoxCox.lambda(xshort, method='loglik') | |
xshort <- BoxCox(xshort, stepLambda) | |
} | |
out <- FUN(xshort, h=maxHorizon, ...) | |
if (preProcess) { | |
out <- InvBoxCox(out, stepLambda) | |
} | |
return(out) | |
} else if (! is.null(xreg)) { | |
if (fixedWindow) { | |
xshort <- window(x, start=st+(i-minObs+1)/freq, end=st+i/freq) | |
xregshort <- xreg[((i):(i+minObs-1)),,drop=FALSE] | |
} else { | |
xshort <- window(x, end=st + i/freq) | |
xregshort <- xreg[(1:(i+minObs-1)),,drop=FALSE] | |
} | |
newxreg <- xreg[(i+minObs):(i+minObs-1+maxHorizon),,drop=FALSE] | |
if (preProcess) { | |
if (testObject(lambda)) { | |
stop("Don't specify a lambda parameter when preProcess==TRUE") | |
} | |
stepLambda <- BoxCox.lambda(xshort, method='loglik') | |
xshort <- BoxCox(xshort, stepLambda) | |
} | |
out <- FUN(xshort, h=maxHorizon, | |
xreg=xregshort, newxreg=newxreg, ...) | |
if (preProcess) { | |
out <- InvBoxCox(out, stepLambda) | |
} | |
return(out) | |
} | |
} | |
#Extract the actuals we actually want to use | |
actuals <- actuals[steps,,drop=FALSE] | |
#Accuracy at each horizon | |
out <- data.frame( | |
ldply(1:maxHorizon, | |
function(horizon) { | |
P <- forcasts[,horizon,drop=FALSE] | |
A <- na.omit(actuals[,horizon,drop=FALSE]) | |
P <- P[1:length(A)] | |
P <- na.omit(P) | |
A <- A[1:length(P)] | |
summaryFunc(P,A) | |
} | |
) | |
) | |
#Add average accuracy, across all horizons | |
overall <- colMeans(out) | |
out <- rbind(out,overall) | |
#Add a column for which horizon and output | |
return(data.frame(horizon=c(1:maxHorizon,'All'),out)) | |
} | |
#Summary function for time series cross-validation | |
stopifnot(require(compiler)) | |
testObject <- function(object){ | |
exists(as.character(substitute(object))) | |
} | |
tsSummary <- cmpfun(function(P,A) { | |
data.frame(t(accuracy(P,A))) | |
}) | |
#Forecasting wrappers | |
meanForecast <- cmpfun(function(x,h,...) { | |
require(forecast) | |
meanf(x, h, ..., level=99)$mean | |
}) | |
naiveForecast <- cmpfun(function(x,h,...) { | |
require(forecast) | |
naive(x, h, ..., level=99)$mean | |
}) | |
auto.arimaForecast <- cmpfun(function(x,h,xreg=NULL,newxreg=NULL,...) { | |
require(forecast) | |
fit <- auto.arima(x, xreg=xreg, ...) | |
forecast(fit, h=h, level=99, xreg=newxreg)$mean | |
}) | |
#Setup | |
set.seed(1) | |
library(quantmod) | |
library(forecast) | |
#Load data | |
getSymbols('^GSPC', from='1980-01-01') | |
#Simplify to monthly level | |
GSPC <- to.monthly(GSPC) | |
Data <- as.ts(Cl(GSPC), start=1980) | |
#Setup model cross-validation | |
myControl <- list( minObs=12, | |
stepSize=1, | |
maxHorizon=12, | |
fixedWindow=TRUE, | |
preProcess=FALSE, | |
summaryFunc=tsSummary | |
) | |
#Cross validate 3 models (model 3 is SLOW!) | |
model1 <- cv.ts(Data, meanForecast, myControl) | |
model2 <- cv.ts(Data, naiveForecast, myControl) | |
model3 <- cv.ts(Data, auto.arimaForecast, myControl, ic='bic') | |
#Find the RMSE for each model and create a matrix with our results | |
models <- list(model1,model2,model3) | |
models <- lapply(models, function(x) x[1:12,'RMSE']) | |
results <- do.call(cbind,models) | |
colnames(results) <- c('mean','naive','ar') | |
#Order by average RMSE for the 1st 3 months | |
results <- t(results) | |
order <- rowMeans(results[,1:3]) | |
results <- results[order(order),] | |
print(results) | |
#Plot | |
color <- 1:nrow(results) | |
plot(results[1,], col=1, type='l', ylim=c(min(results),max(results))) | |
for (i in color) { | |
lines(results[i,],col=i) | |
} | |
legend("topleft",legend=row.names(results),col=color,lty=1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment