SQL implementation of Largest Triangle Three Bucket time series sampling algoritm
create type point_t as (x float, y float); | |
create type triangle_t as (p1 point_t, p2 point_t, p3 point_t, surface float); | |
create function largest_triangle_accum (maxsurfacetriangle triangle_t, p1 point_t, p2 point_t, p3 point_t) | |
returns triangle_t | |
language SQL | |
as $$ | |
select | |
case | |
when maxsurfacetriangle is null or triangle_surface(p1,p2,p3) > maxsurfacetriangle.surface | |
then (p1,p2,p3,triangle_surface(p1,p2,p3))::triangle_t | |
else maxsurfacetriangle | |
end | |
$$; | |
create function triangle_surface(p1 point_t,p2 point_t,p3 point_t) | |
returns float | |
language SQL | |
as $$ | |
select abs(p1.x * (p2.y - p3.y) + p2.x * (p3.y - p1.y) + p3.x * ( p1.y - p2.y)) / 2 | |
$$; | |
create aggregate largest_triangle (point_t,point_t,point_t) ( | |
stype = triangle_t, | |
sfunc = largest_triangle_accum | |
); | |
with recursive inputparams as ( | |
select 260 as BucketSize | |
), | |
timeseries as ( | |
select * from ( | |
select '2019-04-01 23:59:00'::timestamp, 3504 | |
union select '2019-04-01 23:58:00'::timestamp, 3488 | |
union select '2019-04-01 23:57:00'::timestamp, 3677 | |
union select '2019-04-01 23:56:00'::timestamp, 3526 | |
union select '2019-04-01 23:55:00'::timestamp, 2688 | |
union select '2019-04-01 23:54:00'::timestamp, 2257 | |
union select '2019-04-01 23:53:00'::timestamp, 1595 | |
union select '2019-04-01 23:52:00'::timestamp, 1654 | |
union select '2019-04-01 23:51:00'::timestamp, 2024 | |
union select '2019-04-01 23:50:00'::timestamp, 2584 | |
union select '2019-04-01 23:49:00'::timestamp, 3322 | |
union select '2019-04-01 23:48:00'::timestamp, 3715 | |
union select '2019-04-01 23:47:00'::timestamp, 3671 | |
union select '2019-04-01 23:46:00'::timestamp, 3571) | |
tab(timestamp,value) | |
), | |
-- First and last timestamp, value of the timeseries | |
tsrange as ( | |
select | |
(select (extract(epoch from timestamp), value)::point_t | |
from timeseries order by timestamp asc limit 1) as frst , | |
(select (extract(epoch from timestamp), value)::point_t | |
from timeseries order by timestamp desc limit 1) as lst | |
), | |
-- Add bucket number (grp column) for all but the last bucket | |
withgrptmp as ( | |
select 1 as grp, (tsr.frst::point_t).x, (tsr.frst::point_t).y from tsrange tsr | |
union | |
select | |
1 + dense_rank() over (order by i.BucketSize * cast(extract(epoch from timestamp) / i.BucketSize as int)) as grp | |
,extract(epoch from timeseries.timestamp) | |
,value as val | |
from timeseries, tsrange tsr, inputparams i | |
where timestamp > to_timestamp((tsr.frst::point_t).x) at time zone 'utc' and timestamp < to_timestamp((tsr.lst::point_t).x) at time zone 'utc' | |
), | |
-- Add bucket number (grp column) | |
withgrp as ( | |
select * from withgrptmp | |
union | |
select 1 + (select max(grp) from withgrptmp) as grp, (tsr.lst::point_t).x, (tsr.lst::point_t).y from tsrange tsr | |
), | |
-- Average per bucket | |
withgrpavgtmp as ( | |
select grp,avg(x) as xavg, avg(y) as yavg from withgrp group by grp | |
), | |
-- Join time series timestamp, value with average values of next bucket | |
withgrpavg as ( | |
select withgrp.grp as grp,withgrp.x,withgrp.y, withgrpavgtmp.xavg as xavg3,withgrpavgtmp.yavg as yavg3 | |
from withgrp left outer join withgrpavgtmp on withgrp.grp=withgrpavgtmp.grp-1 | |
), | |
largesttriangle(grp,p) as ( | |
select | |
wga.grp, | |
((0,0)::point_t,(wga.x,wga.y)::point_t,(0,0)::point_t,0.0::float)::triangle_t | |
from withgrpavg wga | |
where grp = 1 | |
union all | |
select distinct | |
wga.grp, | |
(largest_triangle( | |
(ltt.p).p2::point_t, -- the selected point of the previous bucket | |
(wga.x,wga.y)::point_t, -- current bucket | |
(wga.xavg3,wga.yavg3)::point_t -- average of next bucket | |
) over (partition by wga.grp))::triangle_t | |
from withgrpavg wga join largesttriangle ltt on wga.grp=ltt.grp+1 | |
where wga.grp > 1 | |
) | |
select to_timestamp(((t.p).p2::point_t).x) at time zone 'utc',((t.p).p2::point_t).y | |
from largesttriangle t where ((t.p).p2::point_t).y != 0 | |
order by 1 | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment