Skip to content

Instantly share code, notes, and snippets.

@clarkbw
Last active March 8, 2022 20:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save clarkbw/2ed14ee9cb761e5ac00b99129d3fa7b5 to your computer and use it in GitHub Desktop.
Save clarkbw/2ed14ee9cb761e5ac00b99129d3fa7b5 to your computer and use it in GitHub Desktop.
linear regression (trend line) analysis of time series data in postgres SQL (demo)
-- bring all examples together and clean up with some top level variables
WITH vars AS (
SELECT INTERVAL '1 day' AS bucket,
(now() - INTERVAL '3 months') as time_from,
(now() + INTERVAL '1 month') as time_to
)
-- generate timeline into the past and future
,
timeline AS (
SELECT
time_bucket((SELECT bucket FROM vars), series) as time
FROM
generate_series((SELECT time_from FROM vars)::timestamp, (SELECT time_to FROM vars)::timestamp, (SELECT bucket FROM vars)) AS series
ORDER BY 1 ASC
)
-- rollup the daily usage
,
daily_usage AS (
SELECT
time_bucket((SELECT bucket FROM vars), time) as time,
SUM(usage) as usage
FROM meter
WHERE time >= (SELECT time_from FROM vars)
GROUP BY 1
)
-- calculate the slope + intercept for creating a trend
,
trend AS (
SELECT regr_slope(y, x) AS slope, regr_intercept(y, x) AS intercept
FROM (
SELECT
-- convert our time to an integer X value
EXTRACT(epoch FROM time) AS x,
usage AS y
FROM daily_usage
) x_y
)
-- add trendline to rollup data
SELECT *
, (EXTRACT(epoch FROM time) * (SELECT slope FROM trend) + (SELECT intercept FROM trend)) as trend_line
FROM timeline
LEFT JOIN daily_usage USING(time)
DROP TABLE IF EXISTS meter;
CREATE TABLE meter (
time TIMESTAMPTZ NOT NULL,
device_id INTEGER NOT NULL,
usage DOUBLE PRECISION
);
-- OPTIONAL: convert to TimescaleDB hypertable
SELECT create_hypertable('meter','time');
-- generate 6 months of hourly sample data for 4 devices
-- via https://www.timescale.com/blog/how-to-create-lots-of-sample-time-series-data-with-postgresql-generate_series/
INSERT INTO meter ( time, device_id, usage )
SELECT time, device_id, random()*100 as usage
FROM generate_series(
now() - INTERVAL '6 months',
now(),
INTERVAL '1 hour'
) as time,
generate_series(1,4) device_id;
-- add trendline to rollup data
SELECT *
, (EXTRACT(epoch FROM time) * (SELECT slope FROM trend) + (SELECT intercept FROM trend)) as trend_line
FROM timeline
LEFT JOIN daily_usage USING(time)
-- yields some sample data
SELECT DISTINCT ON (device_id) device_id, * FROM meter;
device_id | time | device_id | usage
-----------+-------------------------------+-----------+-------------------
1 | 2021-09-07 19:17:41.868351+00 | 1 | 93.51089160568797
2 | 2021-09-07 19:17:41.868351+00 | 2 | 31.95388355127733
3 | 2021-09-07 19:17:41.868351+00 | 3 | 92.83830527876908
4 | 2021-09-07 19:17:41.868351+00 | 4 | 91.94939261113504
-- generate timeline 3 months into the past and 1 month into the future
timeline AS (
SELECT
time_bucket('1 day', series) as time
FROM
generate_series((now() - INTERVAL '3 months')::timestamp, (now() + INTERVAL '1 month')::timestamp, '1 day') AS series
ORDER BY 1 ASC
)
-- calculate the slope + intercept for creating a trend
trend AS (
SELECT regr_slope(y, x) AS slope, regr_intercept(y, x) AS intercept
FROM (
SELECT
-- convert our time to an integer X value
EXTRACT(epoch FROM time) AS x,
usage AS y
FROM daily_usage
) x_y
)
-- rollup usage into daily buckets
daily_usage AS (
SELECT
time_bucket('1 day', time) as time,
SUM(usage) as usage
FROM meter
WHERE time > (now() - INTERVAL '3 months')
GROUP BY 1
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment