Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Z-score based anomaly detection
NODE context
DESCRIPTION >
Based on https://blog.tinybird.co/2021/06/24/anomaly-detection/
SQL >
%
SELECT
sum(qty) qty,
toStartOfInterval(utc_date, INTERVAL {{Int32(interval_duration, 10)}} second) date
FROM
metrics_ds
WHERE
utc_date between now() - interval {{Int32(minutes, 30)}} minute and now()
GROUP BY
date
ORDER BY
date
NODE context_stats
SQL >
%
SELECT
avg(qty) avg_qty,
stddevPop(qty) std
FROM (
with (select (avg(qty), stddevPop(qty)) std FROM minutes) as ss
SELECT * FROM minutes WHERE qty < ss.1 + 1.5 * ss.2
)
NODE anomalies
SQL >
%
SELECT
date,
(qty - avg_qty) / std as zscore,
qty,
multiIf(qty == 0, 'error', zscore < -1 * {{Float32(zscore, 2)}}, 'error', 'ok') anomaly_low,
multiIf(qty == 0, 'error', zscore > {{Float32(zscore, 2)}}, 'error', 'ok') anomaly_high
FROM (
SELECT
avg(qty) qty,
date
FROM context
WHERE
date between now() - interval {{Int32(duration, 40)}} second and now()
GROUP BY
date
)
CROSS JOIN (
SELECT
*
FROM
context_stats
)
ORDER BY date DESC
LIMIT 10000 OFFSET 1
NODE endpoint
SQL >
SELECT
if(arrayAll(x -> x == 'error', groupArray(anomaly_low)) == 1, 'error', 'ok') anomaly_low,
if(arrayAll(x -> x == 'error', groupArray(anomaly_high)) == 1, 'error', 'ok') anomaly_high,
now() as timestamp
FROM
anomalies
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment