Skip to content

Instantly share code, notes, and snippets.

@noteflakes
Created May 3, 2018 06:48
Show Gist options
  • Save noteflakes/230dd56e1143f89faa5a847906c85c04 to your computer and use it in GitHub Desktop.
Save noteflakes/230dd56e1143f89faa5a847906c85c04 to your computer and use it in GitHub Desktop.
Calculate weighted average over irregular time series
create table samples (
stamp timestamptz,
series integer,
value float
);
insert into samples values
('2018-04-30 23:00:00+02', 1, 12.3),
('2018-05-01 01:45:00+02', 1, 22.2),
('2018-05-01 02:13:00+02', 1, 21.6),
('2018-05-01 02:26:00+02', 1, 14.9),
('2018-05-01 03:02:00+02', 1, 16.9);
with
t_values as (
select * from (
-- select last value prior to time range
(select
stamp,
to_timestamp(extract(epoch from stamp)::integer / 3600 * 3600)
as stamp_aligned,
value,
false as filled_in
from samples
where
series = 1 and
stamp < '2018-05-01 00:00:00+02'
order by
stamp desc
limit 1) union
-- select records from given time range
(select
stamp,
to_timestamp(extract(epoch from stamp)::integer / 3600 * 3600)
as stamp_aligned,
value,
false as filled_in
from samples
where
series = 1 and
stamp >= '2018-05-01 00:00:00+02' and
stamp <= '2018-05-01 04:00:00+02'
order by
stamp) union
-- select all regular periods for given time range
(select
stamp,
stamp as stamp_aligned,
null as value,
true as filled_in
from generate_series(
'2018-05-01 00:00:00+02',
'2018-05-01 04:00:00+02',
interval '3600 seconds'
) stamp)
) states
order by stamp
),
-- since records generated using generate_series do not contain values,
-- we need to copy the value from the last non-generated record.
t_with_filled_in_values as (
select *
from (
select
stamp,
stamp_aligned,
-- fill in value from last non-filled record (the first record having
-- the same filled_in_partition value)
(case when filled_in then
first_value(value) over (partition by filled_in_partition order by stamp)
else value end) as value
from (
select
stamp,
stamp_aligned,
value,
filled_in,
-- this field is incremented on every non-filled record
sum(case when filled_in then 0 else 1 end)
over (order by stamp) as filled_in_partition
from
t_values
) t_filled_in_partition
) t_filled_in_values
-- we wrap the filling-in query in order to remove any record before the
-- beginning of the given time range
where stamp >= '2018-05-01 00:00:00+02'
order by stamp
),
t_with_weight as (
select
stamp,
stamp_aligned,
value,
-- use window to get stamp from next record in order to calculate the
-- duration of the record which, divided by the period, gives us the
-- weight.
coalesce(extract(epoch from (lead(stamp) over (order by stamp) - stamp)),
3600)::float / 3600 as weight
from t_with_filled_in_values
order by stamp
)
select
stamp_aligned as stamp,
sum(value * weight) as avg
from t_with_weight
group by stamp_aligned
order by stamp_aligned;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment