Created
May 3, 2018 06:48
-
-
Save noteflakes/230dd56e1143f89faa5a847906c85c04 to your computer and use it in GitHub Desktop.
Calculate weighted average over irregular time series
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create table samples ( | |
stamp timestamptz, | |
series integer, | |
value float | |
); | |
insert into samples values | |
('2018-04-30 23:00:00+02', 1, 12.3), | |
('2018-05-01 01:45:00+02', 1, 22.2), | |
('2018-05-01 02:13:00+02', 1, 21.6), | |
('2018-05-01 02:26:00+02', 1, 14.9), | |
('2018-05-01 03:02:00+02', 1, 16.9); | |
with | |
t_values as ( | |
select * from ( | |
-- select last value prior to time range | |
(select | |
stamp, | |
to_timestamp(extract(epoch from stamp)::integer / 3600 * 3600) | |
as stamp_aligned, | |
value, | |
false as filled_in | |
from samples | |
where | |
series = 1 and | |
stamp < '2018-05-01 00:00:00+02' | |
order by | |
stamp desc | |
limit 1) union | |
-- select records from given time range | |
(select | |
stamp, | |
to_timestamp(extract(epoch from stamp)::integer / 3600 * 3600) | |
as stamp_aligned, | |
value, | |
false as filled_in | |
from samples | |
where | |
series = 1 and | |
stamp >= '2018-05-01 00:00:00+02' and | |
stamp <= '2018-05-01 04:00:00+02' | |
order by | |
stamp) union | |
-- select all regular periods for given time range | |
(select | |
stamp, | |
stamp as stamp_aligned, | |
null as value, | |
true as filled_in | |
from generate_series( | |
'2018-05-01 00:00:00+02', | |
'2018-05-01 04:00:00+02', | |
interval '3600 seconds' | |
) stamp) | |
) states | |
order by stamp | |
), | |
-- since records generated using generate_series do not contain values, | |
-- we need to copy the value from the last non-generated record. | |
t_with_filled_in_values as ( | |
select * | |
from ( | |
select | |
stamp, | |
stamp_aligned, | |
-- fill in value from last non-filled record (the first record having | |
-- the same filled_in_partition value) | |
(case when filled_in then | |
first_value(value) over (partition by filled_in_partition order by stamp) | |
else value end) as value | |
from ( | |
select | |
stamp, | |
stamp_aligned, | |
value, | |
filled_in, | |
-- this field is incremented on every non-filled record | |
sum(case when filled_in then 0 else 1 end) | |
over (order by stamp) as filled_in_partition | |
from | |
t_values | |
) t_filled_in_partition | |
) t_filled_in_values | |
-- we wrap the filling-in query in order to remove any record before the | |
-- beginning of the given time range | |
where stamp >= '2018-05-01 00:00:00+02' | |
order by stamp | |
), | |
t_with_weight as ( | |
select | |
stamp, | |
stamp_aligned, | |
value, | |
-- use window to get stamp from next record in order to calculate the | |
-- duration of the record which, divided by the period, gives us the | |
-- weight. | |
coalesce(extract(epoch from (lead(stamp) over (order by stamp) - stamp)), | |
3600)::float / 3600 as weight | |
from t_with_filled_in_values | |
order by stamp | |
) | |
select | |
stamp_aligned as stamp, | |
sum(value * weight) as avg | |
from t_with_weight | |
group by stamp_aligned | |
order by stamp_aligned; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment