Instantly share code, notes, and snippets.

# welch/README.md Last active Oct 7, 2015

ingredients for the predict proc, in juttle

The juttle predict proc combines trend, seasonal, and level prediction. Although it is native javascript, many of its components can also be written in juttle, and those examples are here.

 export reducer nn(field, alpha, initial=null) { // the "level" state-space model. (an exponentially weighted moving average) var value = initial; function update() { if (*field != null) { if (value == null) { value = *field; } else { value = value + alpha * (*field - value); } } } function result() { return value; } }
 sub put_predict(field, tover, sover, every=null) { // return the portion of the signal in field predicted by // seasonality, trend, and level estimates as P, and the // prediction error as E put_trend -field field -every every -over tover | put detrend = *field - *(field+"_T") | put_seasonal -field 'detrend' -over sover -every every | put deseas = detrend // - detrend_S | put holt = 0//level('deseas', alpha, 0) | put E = deseas - holt | put P = *field - E //| put -over over E = stats.relMean(E) // | (@table;merge) }
 function deCalendarize(duration) { return Duration.new(Duration.seconds(duration)); } sub put_seasonal(field, over, every=null) { // year-over-year moving average series for each month. // this expects an unbatched stream of points, one per month. put __over = deCalendarize(over), __every = deCalendarize(every ?? __over / 30) //| put __bucket = Math.floor((time - Date.quantize(time, __over)) / __every) //| put -over 3 * over __Q = percentile(field,[0, 0.25, 0.5, 0.75]), __count = count() by __bucket // | put S = __Q | put S = level(field,alpha,0) by __bucket }
 input last: duration -default :30 days: -label 'Show this duration:'; import 'stats.juttle' as stats; export sub live_juttle_runs(dur1=:3M:) { read -from (:now: + :1d: - dur1) -space 'prod' event = 'run-juttle' AND properties_page_currentPage ='https://app.jut.io/#explorer' AND context_ip != '207.141.12.50' | reduce -every :h: run_count = count() | (@timechart -title "Weekly App Juttle Program Runs" -valueField 'run_count' -display.dataDensity 0; merge) } const FROM = Date.new(0); const TO = Date.new(0)+:10 year:; export sub series(trend = 1, season = 10, sigma=0.25) { // generate a series of monthly values having given trend, seasonality, and noise emit -from FROM -to TO -every :w: // do not use calendar intervals! | put trend_true= trend, season_true=season, sigma_true=sigma | put n = count(), dy = Duration.as(time - Date.new(0), "y"), cycle = Math.sin(dy * 2 * Math.PI) | put value = n * trend/12 + sigma * (2 * Math.random() - 1) + season * cycle //| put value = value + ((dy > 5) ? 10 : 0) };
 function deCalendarize(duration) { return Duration.new(Duration.seconds(duration)); } sub put_trend(field, over, every=null) { // estimate trailing trend as the median duration-over-duration change of all samples // in a window of -over duration and/or point-to-point change (a variant of the Theil-Sen // estimator). This can use up to [2 * over] of historic data per point, but can begin producing // (noisy) point-to-point estimates after two points. // // trend consumes its input stream, and outputs points every -every with T as the estimated // change over -over, and the trend portion of field as field_trend. [field - field_trend] // is the de-trended series. Additionally, t0,field_0 are the time and value of the initial // point of the result batch, such that field_trend = field_0 + [time - t0] * T. (this allows // the estimated T to be joined with and de-trend a denser version of the input stream). // // reject the trend as being 0 if the quartile range Q1...Q3 contains 0 (be pessimistic about // trends, as we do not expect them in short-horizon operations data; but they will confound // seasonality if not accounted for). // put __over = deCalendarize(over), __every = deCalendarize(every ?? __over / 30) | put __bucket = Math.floor((time - Date.quantize(time, __over)) / __every) | put __change_over = delta(field,null) == null ? null : delta(field) * (over / delta(time,:forever:)) by __bucket // change since over ago, if we have historic data | put __change = __change_over ?? delta(field, 0) * (__over / delta(time,:forever:)) // sample-to-sample, for startup | put -over 2 * over __change = (count() <= 3 || (last(time) - first(time) < 2 * __every)) ? 0 : __change, // moar data, please!! __Q = percentile(__change,[0, 0.25, 0.5, 0.75]), __t0=first(time), __y0=first(field) ,__count = count() | put __T = (__Q < 0 && __Q > 0) ? 0 : __Q // ignore trends around 0, else median | put *(field + "_T") = __y0 + __T * (time - __t0) / __over // trend portion of field's value //| remove __Q, __t0, __y0, __bucket }