Last active
July 2, 2024 14:38
-
-
Save clarkbw/2ed14ee9cb761e5ac00b99129d3fa7b5 to your computer and use it in GitHub Desktop.
linear regression (trend line) analysis of time series data in postgres SQL (demo)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- bring all examples together and clean up with some top level variables | |
WITH vars AS ( | |
SELECT INTERVAL '1 day' AS bucket, | |
(now() - INTERVAL '3 months') as time_from, | |
(now() + INTERVAL '1 month') as time_to | |
) | |
-- generate timeline into the past and future | |
, | |
timeline AS ( | |
SELECT | |
time_bucket((SELECT bucket FROM vars), series) as time | |
FROM | |
generate_series((SELECT time_from FROM vars)::timestamp, (SELECT time_to FROM vars)::timestamp, (SELECT bucket FROM vars)) AS series | |
ORDER BY 1 ASC | |
) | |
-- rollup the daily usage | |
, | |
daily_usage AS ( | |
SELECT | |
time_bucket((SELECT bucket FROM vars), time) as time, | |
SUM(usage) as usage | |
FROM meter | |
WHERE time >= (SELECT time_from FROM vars) | |
GROUP BY 1 | |
) | |
-- calculate the slope + intercept for creating a trend | |
, | |
trend AS ( | |
SELECT regr_slope(y, x) AS slope, regr_intercept(y, x) AS intercept | |
FROM ( | |
SELECT | |
-- convert our time to an integer X value | |
EXTRACT(epoch FROM time) AS x, | |
usage AS y | |
FROM daily_usage | |
) x_y | |
) | |
-- add trendline to rollup data | |
SELECT * | |
, (EXTRACT(epoch FROM time) * (SELECT slope FROM trend) + (SELECT intercept FROM trend)) as trend_line | |
FROM timeline | |
LEFT JOIN daily_usage USING(time) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DROP TABLE IF EXISTS meter; | |
CREATE TABLE meter ( | |
time TIMESTAMPTZ NOT NULL, | |
device_id INTEGER NOT NULL, | |
usage DOUBLE PRECISION | |
); | |
-- OPTIONAL: convert to TimescaleDB hypertable | |
SELECT create_hypertable('meter','time'); | |
-- generate 6 months of hourly sample data for 4 devices | |
-- via https://www.timescale.com/blog/how-to-create-lots-of-sample-time-series-data-with-postgresql-generate_series/ | |
INSERT INTO meter ( time, device_id, usage ) | |
SELECT time, device_id, random()*100 as usage | |
FROM generate_series( | |
now() - INTERVAL '6 months', | |
now(), | |
INTERVAL '1 hour' | |
) as time, | |
generate_series(1,4) device_id; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- add trendline to rollup data | |
SELECT * | |
, (EXTRACT(epoch FROM time) * (SELECT slope FROM trend) + (SELECT intercept FROM trend)) as trend_line | |
FROM timeline | |
LEFT JOIN daily_usage USING(time) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- yields some sample data | |
SELECT DISTINCT ON (device_id) device_id, * FROM meter; | |
device_id | time | device_id | usage | |
-----------+-------------------------------+-----------+------------------- | |
1 | 2021-09-07 19:17:41.868351+00 | 1 | 93.51089160568797 | |
2 | 2021-09-07 19:17:41.868351+00 | 2 | 31.95388355127733 | |
3 | 2021-09-07 19:17:41.868351+00 | 3 | 92.83830527876908 | |
4 | 2021-09-07 19:17:41.868351+00 | 4 | 91.94939261113504 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- generate timeline 3 months into the past and 1 month into the future | |
timeline AS ( | |
SELECT | |
time_bucket('1 day', series) as time | |
FROM | |
generate_series((now() - INTERVAL '3 months')::timestamp, (now() + INTERVAL '1 month')::timestamp, '1 day') AS series | |
ORDER BY 1 ASC | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- calculate the slope + intercept for creating a trend | |
trend AS ( | |
SELECT regr_slope(y, x) AS slope, regr_intercept(y, x) AS intercept | |
FROM ( | |
SELECT | |
-- convert our time to an integer X value | |
EXTRACT(epoch FROM time) AS x, | |
usage AS y | |
FROM daily_usage | |
) x_y | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- rollup usage into daily buckets | |
daily_usage AS ( | |
SELECT | |
time_bucket('1 day', time) as time, | |
SUM(usage) as usage | |
FROM meter | |
WHERE time > (now() - INTERVAL '3 months') | |
GROUP BY 1 | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment