|
// Exponential smoother/forecaster with de-seasonalization, based on Holt-Winters. |
|
// This models an unobserved level and trend component in a noisy signal, along with |
|
// optional "seasonal" effects at the level of day-over-day variation. |
|
// The result is a smoothed version of the signal which may be used to forecast |
|
// future values or detect unexpected variation. |
|
// |
|
// Y is the point field to smooth. |
|
// slevel, strend, sdaily are smoothing factors, numbers ranging [0..1]. |
|
// They determine how quickly the smoother adjusts to new values as they arrive. |
|
// Setting a factor to 0 freezes the feature at its initial estimate, |
|
// while setting a factor to 1 causes that feature to depend solely |
|
// on the most recent point. Setting strend or sdaily to null |
|
// removes that feature from the model. |
|
// |
|
// smooth assumes arriving points are equally spaced in time. |
|
// smooth returns the forecast of the current value (using state prior to the |
|
// current value) and also updates to internal state estimates, as an array: |
|
// [value, level, trend, daily] |
|
// level is the estimate of the deseasonalized series value, trend the estimate of the |
|
// deseasonalized series slope. daily is the estimated seasonal factor |
|
// to be used at the next occurrence of the current position in the cycle. |
|
// |
|
// This implementation differs from additive Holt-Winters in its handling of |
|
// seasonality. It independently fits a C1 piecewise cubic curve to |
|
// day-over-day values, and maintains this via gradient descent on prediction error |
|
// rather than as part of a recursive Winters update (the small changes to make this |
|
// a Winters-style update are commented in the code below). |
|
// |
|
import "interpolate.juttle" as interp; |
|
|
|
export reducer smooth(Y, slevel, strend, sdaily) { |
|
var level, trend, day, daily, time, yhat; |
|
var initial = true, n_daily=6; |
|
|
|
function predict() { |
|
return level + trend + day; |
|
} |
|
|
|
function update() { |
|
var err, Wday, i_daily, W, du; |
|
|
|
if (initial) { |
|
yhat = *Y ; |
|
level = 0; |
|
trend = 0; |
|
day = 0; |
|
time = *"time"; |
|
daily = [*Y,*Y,*Y,*Y,*Y,*Y]; |
|
initial = false; |
|
} else { |
|
// error-correction form of Holt-Winters, eg, https://www.otexts.org/fpp |
|
// du = 1 ; // Winters-style: don't scale by interval |
|
du = (*"time" - time) / :hour: ; |
|
time = *"time"; |
|
if (sdaily != null) { |
|
// get the previous daily factors for this time |
|
Wday = interp.project_cubic(*"time", daily, n_daily, :day:); |
|
day = Wday[0]; |
|
i_daily = Wday[1]; |
|
W = Wday[2]; |
|
} |
|
yhat = predict() ; // 1-step forecast from previous state as reducer return value |
|
err = *Y - yhat; |
|
if (sdaily != null) { |
|
// update day and project the change back onto the daily array |
|
// var derr = err ; // Winters-style: recursive update sees level and trend error |
|
var derr = (*Y - day); |
|
var dday = sdaily * derr * du; |
|
day = day + dday; |
|
daily[i_daily ] = daily[i_daily] + dday * W[0]; |
|
daily[(i_daily+1) % n_daily] = daily[(i_daily+1) % n_daily] + dday * W[1]; |
|
daily[(i_daily+2) % n_daily] = daily[(i_daily+2) % n_daily] + dday * W[2]; |
|
daily[(i_daily+3) % n_daily] = daily[(i_daily+3) % n_daily] + dday * W[3]; |
|
} |
|
if (strend != null) { |
|
trend = trend + slevel * strend * err * du; |
|
} |
|
level = level + trend + slevel * err * du; |
|
} |
|
} |
|
function result() { |
|
return [yhat, level, trend, day ]; |
|
} |
|
function reset() { |
|
initial = true; |
|
} |
|
} |
|
|
|
// derivative (d/dt) of a timeseries, approximated |
|
// as a backwards difference of Y vs the time field. |
|
reducer ddt(Y, units) { |
|
var y, time, dy, dt; |
|
var initial = true; |
|
|
|
function update() { |
|
if (initial) { |
|
dy = 0; |
|
y = *Y; |
|
dt = :1s:; |
|
time = *"time"; |
|
initial = false; |
|
} else { |
|
dy = *Y - y ; |
|
y = *Y; |
|
dt = *"time" - time; |
|
time = *"time"; |
|
} |
|
} |
|
function result() { |
|
return dy * Duration.seconds(units) / Duration.seconds(dt); |
|
} |
|
function reset() { |
|
initial = true; |
|
} |
|
} |
|
|
|
// Compute a forecast along with prediction intervals based |
|
// on a smoothed stderr estimate. Y is the field to forecast. z |
|
// is the number of stdandard deviations from mean to include in |
|
// the prediction interval (z = 1.96 is the 97.5 percentile and thus |
|
// a 95% confidence bound, if errors were idependent and normally |
|
// distributed (often untrue)). |
|
// slevel and strend are smoothing factors for Holt. |
|
// |
|
// For a field name Y, this populates the current point with: |
|
// Y_pred: predicted next y value |
|
// Y_level: de-seasonalized prediction |
|
// Y_trend: predicted next slope |
|
// Y_day: predicted next daily seasonal factor |
|
// Y_err: estimated stderr (RMS prediction error over time) |
|
// Y_hi, Y_low: prediction interval bounds |
|
// |
|
export sub forecastPI(Y, z, alpha, slevel, strend, sdaily) { |
|
put state = smooth(Y, slevel, strend, sdaily) |
|
| put err2 = (*Y - state[0]) * (*Y - state[0]) |
|
| put stderr = Math.sqrt(smooth(err2, alpha, null, null)[0]) |
|
| put *(Y+"_pred") = state[0] |
|
| put *(Y+"_level") = state[1] |
|
| put *(Y+"_trend") = state[2] |
|
| put *(Y+"_day") = state[3] |
|
| put *(Y+"_err") = stderr |
|
| put *(Y+"_derr") = ddt(stderr, :hour:) |
|
| put *(Y+"_hi") = state[0] + z * stderr |
|
| put *(Y+"_low") = state[0] - z * stderr |
|
| remove state, err2, stderr |
|
} |
|
|
|
// Data generation utilities for demos: |
|
|
|
sub hourly_cpu(from, to) { |
|
demo cdn metrics 'cpu' -from from -to to -every :m: |
|
-nhosts 4 -dos .5 -dos_dur :5m: -ripple .3 -cpu_alpha 0.8 |
|
| filter host ~ 'sjc*' |
|
| put cpu=value |
|
| remove type, host, pop, value |
|
} |
|
|
|
sub daily_cpu(from, to, every) { |
|
demo cdn metrics 'cpu' -from from -to to -every every |
|
-nhosts 4 -dos 0.3 -dos_dur :30m: -cpu_alpha 0.8 -daily 4 |
|
| filter host ~ 'sjc*' |
|
| put cpu=value |
|
| remove type, host, pop, value |
|
} |
|
|
|
sub bumpy_cpu(from, to) { |
|
demo cdn metrics 'cpu' -from from -to to -every :5m: |
|
-nhosts 4 -dos .3 -dos_dur :15s: -daily 3 |
|
| filter host ~ 'sjc*' |
|
| put cpu=value |
|
| remove type, host, pop, value |
|
} |
|
|
|
// Demo: forecasting for simulated CPU |
|
// |
|
// This demo performs a smoothing forecast including a continuous |
|
// model of repeated day-to-day varition. It displays timeseries of |
|
// the original input, the predicted/smoothed input, a 95% confidence |
|
// interval around this prediction, and the stderr. It also displays |
|
// the prediction components: level, trend and daily variation. |
|
// The seasonal component needs to see a few days' data to settle in. |
|
// |
|
daily_cpu -from :2014-01-01: -to :2014-01-08: -every :10m: |
|
| forecastPI -Y "cpu" -z 1.96 -alpha .5 -slevel .1 -strend null -sdaily .2 |
|
|( keep cpu, cpu_pred, cpu_level, cpu_trend, cpu_day, cpu_err, cpu_derr, time |
|
| split cpu, cpu_pred, cpu_level, cpu_trend, cpu_day, cpu_err, cpu_derr |
|
| @timechart -display.duration :3d: |
|
; keep cpu, cpu_pred, cpu_low, cpu_hi, time |
|
| split cpu, cpu_pred, cpu_low, cpu_hi |
|
| @timechart -display.duration :3d: |
|
) |