Skip to content

Instantly share code, notes, and snippets.

@IronistM
Created November 11, 2013 10:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save IronistM/7411127 to your computer and use it in GitHub Desktop.
Save IronistM/7411127 to your computer and use it in GitHub Desktop.
suppressMessages(library(forecast))
data<-read.csv( file('stdin') )
anomaly_detection<-function(data){
seasonality<-48
data_series<-ts(data$count,frequency=seasonality)
train_start<-1 ## train on 1 month of data
train_end<-ifelse(length(data_series)%%2==0,length(data_series)/2,(length(data_series)-1)/2)
test_start<-train_end
test_end<-length(data_series)-1 ## do not include last period of data since it's potentially not complete
# find best model
f_prev<-forecast_model(data_series,test_start,test_end-1,train_start,train_end)
## fit model to every point except last one to check if previous value is an anomaly
## if so, do not use it when forecasting next point
previous<-data_series[test_end]
## ignore last point if it was an anomaly when fitting model
is_anomaly_previous<-ifelse(previous>f_prev$upper[2]|previous<f_prev$lower[2],TRUE,FALSE)
if(is_anomaly_previous){
## if last point was anomalous, set it equal to the point at the previous season
data_series[test_end]<-data_series[test_end-seasonality]
}
## similar call as above, except now data_series is not using the anomalous value
## similar call as above, except now data_series is using the latest value in data_series to forecast
f_cur<-forecast_model(data_series,test_start,test_end,train_start,train_end)
current<-data_series[test_end+1]
is_anomaly_current<-ifelse(current>f_cur$upper[2]|current<f_cur$lower[2],TRUE,FALSE)
return(c(data[test_end+1,1],current,is_anomaly_current))
}
# function to select best fitting forecast model
forecast_model<-function(data_series,test_start,test_end,train_start,train_end){
# train model on training set
arima_model<-auto.arima(data_series[train_start:train_end])
# Set up a HoltWinters model. If the time series isn't long enough, revert to the arima model
HW_model<-tryCatch(HoltWinters(ts(data_series[train_start:train_end],frequency=48)),
error = function (e) arima_model,
warning = function(w) arima_model)
# forecast model on next 6 points
# this is the "testing set"
arima_model_forecast<-forecast(arima_model,h=6)
HW_model_forecast<-forecast(HW_model,h=6)
# compare forecasted values to actual values to determine accuracy
arima_acc<-accuracy(arima_model_forecast,data_series[test_start:(test_start+6)])
HW_acc<-accuracy(HW_model_forecast,data_series[test_start:(test_start+6)])
# model error is average of Root Mean Square Error and Mean Absolute Percentage Error
# select model with lowest level of error
arima_err<-(arima_acc[2,2]+arima_acc[2,5])/2
HW_err<-(HW_acc[2,2]+HW_acc[2,5])/2
# fit model to entire testing set
if(arima_err<=HW_err){
f.model<-auto.arima(data_series[test_start:test_end])
} else {
f.model<-HoltWinters(ts(data_series[test_start:test_end],frequency=48))
}
# forecast next point using best model
f<-forecast(f.model,h=1,level=c(0.8,0.95,0.99))
return(f)
}
cat( anomaly_detection(data), sep="," )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment