Skip to content

Instantly share code, notes, and snippets.

@welch
Last active October 7, 2015 19:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save welch/1117319b8232653b5245 to your computer and use it in GitHub Desktop.
Save welch/1117319b8232653b5245 to your computer and use it in GitHub Desktop.
ingredients for the predict proc, in juttle

The juttle predict proc combines trend, seasonal, and level prediction. Although it is native javascript, many of its components can also be written in juttle, and those examples are here.

export reducer nn(field, alpha, initial=null) {
// the "level" state-space model. (an exponentially weighted moving average)
var value = initial;
function update() {
if (*field != null) {
if (value == null) {
value = *field;
} else {
value = value + alpha * (*field - value);
}
}
}
function result() {
return value;
}
}
sub put_predict(field, tover, sover, every=null) {
// return the portion of the signal in field predicted by
// seasonality, trend, and level estimates as P, and the
// prediction error as E
put_trend -field field -every every -over tover
| put detrend = *field - *(field+"_T")
| put_seasonal -field 'detrend' -over sover -every every
| put deseas = detrend // - detrend_S
| put holt = 0//level('deseas', alpha, 0)
| put E = deseas - holt
| put P = *field - E
//| put -over over E = stats.relMean(E)
// | (@table;merge)
}
function deCalendarize(duration) {
return Duration.new(Duration.seconds(duration));
}
sub put_seasonal(field, over, every=null) {
// year-over-year moving average series for each month.
// this expects an unbatched stream of points, one per month.
put __over = deCalendarize(over), __every = deCalendarize(every ?? __over / 30)
//| put __bucket = Math.floor((time - Date.quantize(time, __over)) / __every)
//| put -over 3 * over __Q = percentile(field,[0, 0.25, 0.5, 0.75]), __count = count() by __bucket
// | put S = __Q[2]
| put S = level(field,alpha,0) by __bucket
}
input last: duration -default :30 days: -label 'Show this duration:';
import 'stats.juttle' as stats;
export sub live_juttle_runs(dur1=:3M:) {
read -from (:now: + :1d: - dur1) -space 'prod'
event = 'run-juttle' AND
properties_page_currentPage ='https://app.jut.io/#explorer' AND
context_ip != '207.141.12.50'
| reduce -every :h: run_count = count()
| (@timechart -title "Weekly App Juttle Program Runs" -valueField 'run_count' -display.dataDensity 0; merge)
}
const FROM = Date.new(0);
const TO = Date.new(0)+:10 year:;
export sub series(trend = 1, season = 10, sigma=0.25) {
// generate a series of monthly values having given trend, seasonality, and noise
emit -from FROM -to TO -every :w: // do not use calendar intervals!
| put trend_true= trend, season_true=season, sigma_true=sigma
| put n = count(), dy = Duration.as(time - Date.new(0), "y"), cycle = Math.sin(dy * 2 * Math.PI)
| put value = n * trend/12 + sigma * (2 * Math.random() - 1) + season * cycle
//| put value = value + ((dy > 5) ? 10 : 0)
};
function deCalendarize(duration) {
return Duration.new(Duration.seconds(duration));
}
sub put_trend(field, over, every=null) {
// estimate trailing trend as the median duration-over-duration change of all samples
// in a window of -over duration and/or point-to-point change (a variant of the Theil-Sen
// estimator). This can use up to [2 * over] of historic data per point, but can begin producing
// (noisy) point-to-point estimates after two points.
//
// trend consumes its input stream, and outputs points every -every with T as the estimated
// change over -over, and the trend portion of field as field_trend. [field - field_trend]
// is the de-trended series. Additionally, t0,field_0 are the time and value of the initial
// point of the result batch, such that field_trend = field_0 + [time - t0] * T. (this allows
// the estimated T to be joined with and de-trend a denser version of the input stream).
//
// reject the trend as being 0 if the quartile range Q1...Q3 contains 0 (be pessimistic about
// trends, as we do not expect them in short-horizon operations data; but they will confound
// seasonality if not accounted for).
//
put __over = deCalendarize(over), __every = deCalendarize(every ?? __over / 30)
| put __bucket = Math.floor((time - Date.quantize(time, __over)) / __every)
| put __change_over = delta(field,null) == null ? null : delta(field) * (over / delta(time,:forever:)) by __bucket // change since over ago, if we have historic data
| put __change = __change_over ?? delta(field, 0) * (__over / delta(time,:forever:)) // sample-to-sample, for startup
| put -over 2 * over __change = (count() <= 3 || (last(time) - first(time) < 2 * __every)) ? 0 : __change, // moar data, please!!
__Q = percentile(__change,[0, 0.25, 0.5, 0.75]),
__t0=first(time), __y0=first(field)
,__count = count()
| put __T = (__Q[1] < 0 && __Q[3] > 0) ? 0 : __Q[2] // ignore trends around 0, else median
| put *(field + "_T") = __y0 + __T * (time - __t0) / __over // trend portion of field's value
//| remove __Q, __t0, __y0, __bucket
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment