Skip to content

Instantly share code, notes, and snippets.

@pgp44 pgp44/lttp.sql
Last active Apr 13, 2019

Embed
What would you like to do?
SQL implementation of Largest Triangle Three Bucket time series sampling algoritm
create type point_t as (x float, y float);
create type triangle_t as (p1 point_t, p2 point_t, p3 point_t, surface float);
create function largest_triangle_accum (maxsurfacetriangle triangle_t, p1 point_t, p2 point_t, p3 point_t)
returns triangle_t
language SQL
as $$
select
case
when maxsurfacetriangle is null or triangle_surface(p1,p2,p3) > maxsurfacetriangle.surface
then (p1,p2,p3,triangle_surface(p1,p2,p3))::triangle_t
else maxsurfacetriangle
end
$$;
create function triangle_surface(p1 point_t,p2 point_t,p3 point_t)
returns float
language SQL
as $$
select abs(p1.x * (p2.y - p3.y) + p2.x * (p3.y - p1.y) + p3.x * ( p1.y - p2.y)) / 2
$$;
create aggregate largest_triangle (point_t,point_t,point_t) (
stype = triangle_t,
sfunc = largest_triangle_accum
);
with recursive inputparams as (
select 260 as BucketSize
),
timeseries as (
select * from (
select '2019-04-01 23:59:00'::timestamp, 3504
union select '2019-04-01 23:58:00'::timestamp, 3488
union select '2019-04-01 23:57:00'::timestamp, 3677
union select '2019-04-01 23:56:00'::timestamp, 3526
union select '2019-04-01 23:55:00'::timestamp, 2688
union select '2019-04-01 23:54:00'::timestamp, 2257
union select '2019-04-01 23:53:00'::timestamp, 1595
union select '2019-04-01 23:52:00'::timestamp, 1654
union select '2019-04-01 23:51:00'::timestamp, 2024
union select '2019-04-01 23:50:00'::timestamp, 2584
union select '2019-04-01 23:49:00'::timestamp, 3322
union select '2019-04-01 23:48:00'::timestamp, 3715
union select '2019-04-01 23:47:00'::timestamp, 3671
union select '2019-04-01 23:46:00'::timestamp, 3571)
tab(timestamp,value)
),
-- First and last timestamp, value of the timeseries
tsrange as (
select
(select (extract(epoch from timestamp), value)::point_t
from timeseries order by timestamp asc limit 1) as frst ,
(select (extract(epoch from timestamp), value)::point_t
from timeseries order by timestamp desc limit 1) as lst
),
-- Add bucket number (grp column) for all but the last bucket
withgrptmp as (
select 1 as grp, (tsr.frst::point_t).x, (tsr.frst::point_t).y from tsrange tsr
union
select
1 + dense_rank() over (order by i.BucketSize * cast(extract(epoch from timestamp) / i.BucketSize as int)) as grp
,extract(epoch from timeseries.timestamp)
,value as val
from timeseries, tsrange tsr, inputparams i
where timestamp > to_timestamp((tsr.frst::point_t).x) at time zone 'utc' and timestamp < to_timestamp((tsr.lst::point_t).x) at time zone 'utc'
),
-- Add bucket number (grp column)
withgrp as (
select * from withgrptmp
union
select 1 + (select max(grp) from withgrptmp) as grp, (tsr.lst::point_t).x, (tsr.lst::point_t).y from tsrange tsr
),
-- Average per bucket
withgrpavgtmp as (
select grp,avg(x) as xavg, avg(y) as yavg from withgrp group by grp
),
-- Join time series timestamp, value with average values of next bucket
withgrpavg as (
select withgrp.grp as grp,withgrp.x,withgrp.y, withgrpavgtmp.xavg as xavg3,withgrpavgtmp.yavg as yavg3
from withgrp left outer join withgrpavgtmp on withgrp.grp=withgrpavgtmp.grp-1
),
largesttriangle(grp,p) as (
select
wga.grp,
((0,0)::point_t,(wga.x,wga.y)::point_t,(0,0)::point_t,0.0::float)::triangle_t
from withgrpavg wga
where grp = 1
union all
select distinct
wga.grp,
(largest_triangle(
(ltt.p).p2::point_t, -- the selected point of the previous bucket
(wga.x,wga.y)::point_t, -- current bucket
(wga.xavg3,wga.yavg3)::point_t -- average of next bucket
) over (partition by wga.grp))::triangle_t
from withgrpavg wga join largesttriangle ltt on wga.grp=ltt.grp+1
where wga.grp > 1
)
select to_timestamp(((t.p).p2::point_t).x) at time zone 'utc',((t.p).p2::point_t).y
from largesttriangle t where ((t.p).p2::point_t).y != 0
order by 1
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.