Skip to content

Instantly share code, notes, and snippets.



Last active Jan 14, 2021
What would you like to do?
SQL implementation of Largest Triangle Three Bucket time series sampling algoritm
create type point_t as (x float, y float);
create type triangle_t as (p1 point_t, p2 point_t, p3 point_t, surface float);
create function largest_triangle_accum (maxsurfacetriangle triangle_t, p1 point_t, p2 point_t, p3 point_t)
returns triangle_t
language SQL
as $$
when maxsurfacetriangle is null or triangle_surface(p1,p2,p3) > maxsurfacetriangle.surface
then (p1,p2,p3,triangle_surface(p1,p2,p3))::triangle_t
else maxsurfacetriangle
create function triangle_surface(p1 point_t,p2 point_t,p3 point_t)
returns float
language SQL
as $$
select abs(p1.x * (p2.y - p3.y) + p2.x * (p3.y - p1.y) + p3.x * ( p1.y - p2.y)) / 2
create aggregate largest_triangle (point_t,point_t,point_t) (
stype = triangle_t,
sfunc = largest_triangle_accum
with recursive inputparams as (
select 260 as BucketSize
timeseries as (
select * from (
select '2019-04-01 23:59:00'::timestamp, 3504
union select '2019-04-01 23:58:00'::timestamp, 3488
union select '2019-04-01 23:57:00'::timestamp, 3677
union select '2019-04-01 23:56:00'::timestamp, 3526
union select '2019-04-01 23:55:00'::timestamp, 2688
union select '2019-04-01 23:54:00'::timestamp, 2257
union select '2019-04-01 23:53:00'::timestamp, 1595
union select '2019-04-01 23:52:00'::timestamp, 1654
union select '2019-04-01 23:51:00'::timestamp, 2024
union select '2019-04-01 23:50:00'::timestamp, 2584
union select '2019-04-01 23:49:00'::timestamp, 3322
union select '2019-04-01 23:48:00'::timestamp, 3715
union select '2019-04-01 23:47:00'::timestamp, 3671
union select '2019-04-01 23:46:00'::timestamp, 3571)
-- First and last timestamp, value of the timeseries
tsrange as (
(select (extract(epoch from timestamp), value)::point_t
from timeseries order by timestamp asc limit 1) as frst ,
(select (extract(epoch from timestamp), value)::point_t
from timeseries order by timestamp desc limit 1) as lst
-- Add bucket number (grp column) for all but the last bucket
withgrptmp as (
select 1 as grp, (tsr.frst::point_t).x, (tsr.frst::point_t).y from tsrange tsr
1 + dense_rank() over (order by i.BucketSize * cast(extract(epoch from timestamp) / i.BucketSize as int)) as grp
,extract(epoch from timeseries.timestamp)
,value as val
from timeseries, tsrange tsr, inputparams i
where timestamp > to_timestamp((tsr.frst::point_t).x) at time zone 'utc' and timestamp < to_timestamp((tsr.lst::point_t).x) at time zone 'utc'
-- Add bucket number (grp column)
withgrp as (
select * from withgrptmp
select 1 + (select max(grp) from withgrptmp) as grp, (tsr.lst::point_t).x, (tsr.lst::point_t).y from tsrange tsr
-- Average per bucket
withgrpavgtmp as (
select grp,avg(x) as xavg, avg(y) as yavg from withgrp group by grp
-- Join time series timestamp, value with average values of next bucket
withgrpavg as (
select withgrp.grp as grp,withgrp.x,withgrp.y, withgrpavgtmp.xavg as xavg3,withgrpavgtmp.yavg as yavg3
from withgrp left outer join withgrpavgtmp on withgrp.grp=withgrpavgtmp.grp-1
largesttriangle(grp,p) as (
from withgrpavg wga
where grp = 1
union all
select distinct
(ltt.p).p2::point_t, -- the selected point of the previous bucket
(wga.x,wga.y)::point_t, -- current bucket
(wga.xavg3,wga.yavg3)::point_t -- average of next bucket
) over (partition by wga.grp))::triangle_t
from withgrpavg wga join largesttriangle ltt on wga.grp=ltt.grp+1
where wga.grp > 1
select to_timestamp(((t.p).p2::point_t).x) at time zone 'utc',((t.p).p2::point_t).y
from largesttriangle t where ((t.p).p2::point_t).y != 0
order by 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment