Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save raven-rock/dadcea4dca1c57c889b09bf610dc3ddb to your computer and use it in GitHub Desktop.
Save raven-rock/dadcea4dca1c57c889b09bf610dc3ddb to your computer and use it in GitHub Desktop.
Example of how to update a temporal database table (aka Type 2 slowly changing dimension) from staged data using PostgreSQL
-- Adapted from my MySQL SCD2 version here: https://gist.github.com/raven-rock/ff715978ae75fdf67e42f764ed1566c3.
--
-- Meant to be a more general https://en.wikipedia.org/wiki/Temporal_database technique, not limited
-- to dimensional modeling's SCD2 use case.
-- Set up scd
drop table if exists scd_widget;
create temporary table scd_widget
(
rowid serial
, widget_code text
, widget_name text
, widget_color text
, vtime timestamp
, xtime timestamp default '9999-12-31 23:59:59'
, primary key (rowid)
, unique (widget_code, vtime)
)
;
insert into scd_widget
(
widget_code
, widget_name
, widget_color
, vtime
)
values
('A', 'Simply', 'red', now() - interval '2' day)
, ('B', 'Plexy', 'green', now() - interval '2' day)
, ('C', 'Basicy', 'blue', now() - interval '2' day)
, ('E', 'Lamey', 'sky', now() - interval '2' day)
, ('F', 'Awesomey', null, now() - interval '2' day)
, ('G', 'Sweety', 'violet', now() - interval '2' day)
;
select * from scd_widget;
-- Set up stage
create temporary table stage_widget
(
widget_code text
, widget_name text
, widget_color text
, vtime timestamp default now()
, primary key (widget_code, vtime)
)
;
insert into stage_widget
(
widget_code
, widget_name
, widget_color
, vtime
)
values
-- ('A', 'red') -- deleted
('B', 'Plexy', 'orange', now() - interval '1' day) -- changed
, ('C', 'Basicy', 'blue', now() - interval '1' day)
, ('D', 'Loosy', 'indigo', now() - interval '1' day) -- added
, ('E', 'NoSoLamey', 'sky', now() - interval '1' day)
, ('F', 'Awesomey', 'pink', now() - interval '1' day) -- set color from null to something
, ('G', 'Sweety', null, now() - interval '1' day) -- set color from something to null
;
select * from stage_widget;
-- Changes (excl new)
create temporary table stage_upserts
(
rowid int
, widget_code text
, widget_name text
, widget_color text
, vtime timestamp
, xtime timestamp default '9999-12-31 23:59:59'
, scd_action text
, primary key (widget_code, vtime)
)
;
-- close old records that have a new modified record
insert into stage_upserts
(
rowid
, widget_code
, widget_name
, widget_color
, vtime
, xtime
, scd_action
)
select
d.rowid
, d.widget_code
, d.widget_name
, d.widget_color
, d.vtime
, s.vtime as xtime
, 'close (modified)' as scd_action
from scd_widget d
join stage_widget s on s.widget_code = d.widget_code
where
( -- means it has been modified
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'')
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'')
)
;
select * from stage_upserts order by widget_code, vtime;
-- open new and modified records
insert into stage_upserts
(
widget_code
, widget_name
, widget_color
, vtime
, xtime
, scd_action
)
select
s.widget_code
, s.widget_name
, s.widget_color
, s.vtime as vtime
, infinity() as xtime
, case
when d.widget_code is null then 'open (new)'
else 'open (modified)'
end as scd_action
from stage_widget s
left join scd_widget d on d.widget_code = s.widget_code and d.xtime = infinity()
where
(
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'')
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'')
)
;
-- all the changes to apply
select * from stage_upserts order by widget_code;
-- initial state
select * from scd_widget order by widget_code;
-- apply closes via upsert
insert into scd_widget
(
rowid
, widget_code
, widget_name
, widget_color
, vtime
, xtime
)
select
rowid
, widget_code
, widget_name
, widget_color
, vtime
, xtime
from stage_upserts
where rowid is not null
on conflict (rowid)
do update set
widget_name = excluded.widget_name
, widget_color = excluded.widget_color
, vtime = excluded.vtime
, xtime = excluded.xtime
;
-- open
insert into scd_widget
(
widget_code
, widget_name
, widget_color
, vtime
, xtime
)
select
widget_code
, widget_name
, widget_color
, vtime
, xtime
from stage_upserts
where rowid is null
;
select * from stage_widget order by widget_code, vtime;
select * from scd_widget order by widget_code, vtime;
select * from scd_widget where now() - interval '2' day >= vtime and now() - interval '2' day < xtime order by widget_code, vtime;
select * from scd_widget where now() - interval '1' day >= vtime and now() - interval '1' day < xtime order by widget_code, vtime;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment