Last active
March 21, 2023 21:47
-
-
Save pgp44/79eb8b29647d31439495f9c77ba463f1 to your computer and use it in GitHub Desktop.
SQL implementation of Largest Triangle Three Bucket time series sampling algoritm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create type point_t as (x float, y float); | |
create type triangle_t as (p1 point_t, p2 point_t, p3 point_t, surface float); | |
create function largest_triangle_accum (maxsurfacetriangle triangle_t, p1 point_t, p2 point_t, p3 point_t) | |
returns triangle_t | |
language SQL | |
as $$ | |
select | |
case | |
when maxsurfacetriangle is null or triangle_surface(p1,p2,p3) > maxsurfacetriangle.surface | |
then (p1,p2,p3,triangle_surface(p1,p2,p3))::triangle_t | |
else maxsurfacetriangle | |
end | |
$$; | |
create function triangle_surface(p1 point_t,p2 point_t,p3 point_t) | |
returns float | |
language SQL | |
as $$ | |
select abs(p1.x * (p2.y - p3.y) + p2.x * (p3.y - p1.y) + p3.x * ( p1.y - p2.y)) / 2 | |
$$; | |
create aggregate largest_triangle (point_t,point_t,point_t) ( | |
stype = triangle_t, | |
sfunc = largest_triangle_accum | |
); | |
with recursive inputparams as ( | |
select 260 as BucketSize | |
), | |
timeseries as ( | |
select * from ( | |
select '2019-04-01 23:59:00'::timestamp, 3504 | |
union select '2019-04-01 23:58:00'::timestamp, 3488 | |
union select '2019-04-01 23:57:00'::timestamp, 3677 | |
union select '2019-04-01 23:56:00'::timestamp, 3526 | |
union select '2019-04-01 23:55:00'::timestamp, 2688 | |
union select '2019-04-01 23:54:00'::timestamp, 2257 | |
union select '2019-04-01 23:53:00'::timestamp, 1595 | |
union select '2019-04-01 23:52:00'::timestamp, 1654 | |
union select '2019-04-01 23:51:00'::timestamp, 2024 | |
union select '2019-04-01 23:50:00'::timestamp, 2584 | |
union select '2019-04-01 23:49:00'::timestamp, 3322 | |
union select '2019-04-01 23:48:00'::timestamp, 3715 | |
union select '2019-04-01 23:47:00'::timestamp, 3671 | |
union select '2019-04-01 23:46:00'::timestamp, 3571) | |
tab(timestamp,value) | |
), | |
-- First and last timestamp, value of the timeseries | |
tsrange as ( | |
select | |
(select (extract(epoch from timestamp), value)::point_t | |
from timeseries order by timestamp asc limit 1) as frst , | |
(select (extract(epoch from timestamp), value)::point_t | |
from timeseries order by timestamp desc limit 1) as lst | |
), | |
-- Add bucket number (grp column) for all but the last bucket | |
withgrptmp as ( | |
select 1 as grp, (tsr.frst::point_t).x, (tsr.frst::point_t).y from tsrange tsr | |
union | |
select | |
1 + dense_rank() over (order by i.BucketSize * cast(extract(epoch from timestamp) / i.BucketSize as int)) as grp | |
,extract(epoch from timeseries.timestamp) | |
,value as val | |
from timeseries, tsrange tsr, inputparams i | |
where timestamp > to_timestamp((tsr.frst::point_t).x) at time zone 'utc' and timestamp < to_timestamp((tsr.lst::point_t).x) at time zone 'utc' | |
), | |
-- Add bucket number (grp column) | |
withgrp as ( | |
select * from withgrptmp | |
union | |
select 1 + (select max(grp) from withgrptmp) as grp, (tsr.lst::point_t).x, (tsr.lst::point_t).y from tsrange tsr | |
), | |
-- Average per bucket | |
withgrpavgtmp as ( | |
select grp,avg(x) as xavg, avg(y) as yavg from withgrp group by grp | |
), | |
-- Join time series timestamp, value with average values of next bucket | |
withgrpavg as ( | |
select withgrp.grp as grp,withgrp.x,withgrp.y, withgrpavgtmp.xavg as xavg3,withgrpavgtmp.yavg as yavg3 | |
from withgrp left outer join withgrpavgtmp on withgrp.grp=withgrpavgtmp.grp-1 | |
), | |
largesttriangle(grp,p) as ( | |
select | |
wga.grp, | |
((0,0)::point_t,(wga.x,wga.y)::point_t,(0,0)::point_t,0.0::float)::triangle_t | |
from withgrpavg wga | |
where grp = 1 | |
union all | |
select distinct | |
wga.grp, | |
(largest_triangle( | |
(ltt.p).p2::point_t, -- the selected point of the previous bucket | |
(wga.x,wga.y)::point_t, -- current bucket | |
(wga.xavg3,wga.yavg3)::point_t -- average of next bucket | |
) over (partition by wga.grp))::triangle_t | |
from withgrpavg wga join largesttriangle ltt on wga.grp=ltt.grp+1 | |
where wga.grp > 1 | |
) | |
select to_timestamp(((t.p).p2::point_t).x) at time zone 'utc',((t.p).p2::point_t).y | |
from largesttriangle t where ((t.p).p2::point_t).y != 0 | |
order by 1 | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment