Skip to content

Instantly share code, notes, and snippets.

@Skitionek
Last active October 3, 2019 12:48
Show Gist options
  • Save Skitionek/6042ba19d525dc41b839e939e085a23f to your computer and use it in GitHub Desktop.
Save Skitionek/6042ba19d525dc41b839e939e085a23f to your computer and use it in GitHub Desktop.
Linear interpolation of missing data points (sparse dataset)
-- 60 * 60 * 24 = 86400 // number of seconds in day
WITH range AS (SELECT *,
EXTRACT(EPOCH FROM min) min_e,
EXTRACT(EPOCH FROM max) max_e
FROM
(SELECT min(date) AS min, max(date) AS max FROM prices) s)
SELECT
CASE WHEN prev_d <> min_e AND date <> g
THEN ROUND((m * g + ((price + prev_p) - m * (date + prev_d)) / 2)::NUMERIC, 2)
ELSE price END price,
date <> g interpolated,
TIMESTAMP WITHOUT TIME ZONE 'epoch' + g * INTERVAL '1 second' date
FROM
(SELECT *,
generate_series(prev_d + 86400, date, 86400) g
FROM
(SELECT *,
(price - prev_p) / (date - prev_d) m
FROM
(SELECT
id,
range.min_e,
range.max_e,
EXTRACT(EPOCH FROM
(lag(date, 1, range.min) OVER (PARTITION BY entity_id ORDER BY date)))::BIGINT prev_d,
EXTRACT(EPOCH FROM date)::BIGINT date,
(lag(price, 1, price) OVER (PARTITION BY entity_id ORDER BY date)) prev_p,
price
FROM
prices,
range) a
where date <> prev_d) b) c;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment