Skip to content

Instantly share code, notes, and snippets.

@raven-rock
Last active March 26, 2020 02:00
Show Gist options
  • Save raven-rock/ff715978ae75fdf67e42f764ed1566c3 to your computer and use it in GitHub Desktop.
Save raven-rock/ff715978ae75fdf67e42f764ed1566c3 to your computer and use it in GitHub Desktop.
Example of how to update a Type 2 slowly changing dimension table from staged data using MySQL
/*
In this example,
- rowid is simply a rowid for the scd. Perhaps to avoid confusion we actually always name this "rowid".
- widget_code is the durable natural key (could be a composite set of columns)
- widget_name is the change-tracked attribute (could be set of columns)
- widget_color is another the change-tracked attribute (could be set of columns)
*/
set @infinity = '9999-12-31 23:59:59';
-- Set up scd
create temporary table scd_widget
(
rowid int unsigned auto_increment
, widget_code varchar(20)
, widget_name varchar(20)
, widget_color varchar(20)
, is_current bool not null default true
, valid_from datetime
, valid_to datetime default '9999-12-31 23:59:59'
, primary key (rowid)
, index (
widget_code
, widget_name
, widget_color
, valid_from
, valid_to
)
)
collate latin1_swedish_ci
;
insert into scd_widget
(
widget_code
, widget_name
, widget_color
, valid_from
)
values
('A', 'Simply', 'red', now() - interval 2 day)
, ('B', 'Plexy', 'green', now() - interval 2 day)
, ('C', 'Basicy', 'blue', now() - interval 2 day)
, ('E', 'Lamey', 'sky', now() - interval 2 day)
, ('F', 'Awesomey', null, now() - interval 2 day)
, ('G', 'Sweety', 'violet', now() - interval 2 day)
;
-- select * from scd_widget;
-- Set up stage
create temporary table stage_widget
(
widget_code varchar(20)
, widget_name varchar(20)
, widget_color varchar(20)
, primary key (widget_code)
)
collate latin1_swedish_ci
;
insert into stage_widget
(
widget_code
, widget_name
, widget_color
)
values
-- ('A', 'red') -- deleted
('B', 'Plexy', 'orange') -- changed
, ('C', 'Basicy', 'blue')
, ('D', 'Loosy', 'indigo') -- added
, ('E', 'NoSoLamey', 'sky')
, ('F', 'Awesomey', 'pink') -- set color from null to something
, ('G', 'Sweety', null) -- set color from something to null
;
-- select * from stage_widget;
-- Changes (excl new)
set @tx_datetime = now(); -- tranaction datetime stamp for setting valid_from/to to same moment in time
create temporary table stage_upserts
(
rowid int unsigned default null
, widget_code varchar(20)
, widget_name varchar(20)
, widget_color varchar(20)
, is_current bool not null default true
, valid_from datetime
, valid_to datetime default '9999-12-31 23:59:59'
, scd_action varchar(25)
)
collate latin1_swedish_ci
;
-- close deleted records and old records that have a new modified record
insert into stage_upserts
select
d.rowid
, d.widget_code
, d.widget_name
, d.widget_color
, false as is_current
, d.valid_from
, @tx_datetime as valid_to
-- , d.widget_color as x_old_widget_color
, case
when s.widget_code is null then 'close (deleted)'
else 'close (modified)'
end as scd_action
from scd_widget d
left join stage_widget s on s.widget_code = d.widget_code
where
(
s.widget_code is null -- means it has been "deleted"
or
( -- means it has been modified
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'')
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'')
)
)
;
-- open new and modified records
insert into stage_upserts
select
null as rowid
, s.widget_code
, s.widget_name
, s.widget_color
, true as is_current
, @tx_datetime as valid_from
, @infinity as valid_to
, case
when d.widget_code is null then 'open (new)'
else 'open (modified)'
end as scd_action
from stage_widget s
left join scd_widget d on d.widget_code = s.widget_code and d.is_current = true
where
(
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'')
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'')
)
;
-- all the changes to apply
select * from stage_upserts ;
-- initial state
select * from scd_widget;
-- apply all changes via upsert
insert into scd_widget
(
rowid
, widget_code
, widget_name
, widget_color
, is_current
, valid_from
, valid_to
)
select
rowid
, widget_code
, widget_name
, widget_color
, is_current
, valid_from
, valid_to
from stage_upserts
on duplicate key update
widget_name = values(widget_name)
, widget_color = values(widget_color)
, is_current = values(is_current)
, valid_from = values(valid_from)
, valid_to = values(valid_to)
;
-- final state
select * from scd_widget order by widget_code;
-- is_current=true dimension
select * from scd_widget where is_current = true order by widget_code;
-- yesterday view of dimension
set @yesterday = now() - interval 1 day;
select @yesterday;
select * from scd_widget where @yesterday >= valid_from and @yesterday < valid_to order by widget_code;
--------------
set @infinity = '9999-12-31 23:59:59'
--------------
Query OK, 0 rows affected (0.16 sec)
--------------
create temporary table scd_widget
(
rowid int unsigned auto_increment
, widget_code varchar(20)
, widget_name varchar(20)
, widget_color varchar(20)
, is_current bool not null default true
, valid_from datetime
, valid_to datetime default '9999-12-31 23:59:59'
, primary key (rowid)
, index (
widget_code
, widget_name
, widget_color
, valid_from
, valid_to
)
)
collate latin1_swedish_ci
--------------
Query OK, 0 rows affected (0.14 sec)
--------------
insert into scd_widget
(
widget_code
, widget_name
, widget_color
, valid_from
)
values
('A', 'Simply', 'red', now() - interval 2 day)
, ('B', 'Plexy', 'green', now() - interval 2 day)
, ('C', 'Basicy', 'blue', now() - interval 2 day)
, ('E', 'Lamey', 'sky', now() - interval 2 day)
, ('F', 'Awesomey', null, now() - interval 2 day)
, ('G', 'Sweety', 'violet', now() - interval 2 day)
--------------
Query OK, 6 rows affected (0.15 sec)
Records: 6 Duplicates: 0 Warnings: 0
--------------
create temporary table stage_widget
(
widget_code varchar(20)
, widget_name varchar(20)
, widget_color varchar(20)
, primary key (widget_code)
)
collate latin1_swedish_ci
--------------
Query OK, 0 rows affected (0.14 sec)
--------------
insert into stage_widget
(
widget_code
, widget_name
, widget_color
)
values
('B', 'Plexy', 'orange')
, ('C', 'Basicy', 'blue')
, ('D', 'Loosy', 'indigo')
, ('E', 'NoSoLamey', 'sky')
, ('F', 'Awesomey', 'pink')
, ('G', 'Sweety', null)
--------------
Query OK, 6 rows affected (0.14 sec)
Records: 6 Duplicates: 0 Warnings: 0
--------------
set @tx_datetime = now()
--------------
Query OK, 0 rows affected (0.14 sec)
--------------
create temporary table stage_upserts
(
rowid int unsigned default null
, widget_code varchar(20)
, widget_name varchar(20)
, widget_color varchar(20)
, is_current bool not null default true
, valid_from datetime
, valid_to datetime default '9999-12-31 23:59:59'
, scd_action varchar(25)
)
collate latin1_swedish_ci
--------------
Query OK, 0 rows affected (0.15 sec)
--------------
insert into stage_upserts
select
d.rowid
, d.widget_code
, d.widget_name
, d.widget_color
, false as is_current
, d.valid_from
, @tx_datetime as valid_to
, case
when s.widget_code is null then 'close (deleted)'
else 'close (modified)'
end as scd_action
from scd_widget d
left join stage_widget s on s.widget_code = d.widget_code
where
(
s.widget_code is null
or
(
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'')
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'')
)
)
--------------
Query OK, 5 rows affected (0.14 sec)
Records: 5 Duplicates: 0 Warnings: 0
--------------
insert into stage_upserts
select
null as rowid
, s.widget_code
, s.widget_name
, s.widget_color
, true as is_current
, @tx_datetime as valid_from
, @infinity as valid_to
, case
when d.widget_code is null then 'open (new)'
else 'open (modified)'
end as scd_action
from stage_widget s
left join scd_widget d on d.widget_code = s.widget_code and d.is_current = true
where
(
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'')
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'')
)
--------------
Query OK, 5 rows affected (0.15 sec)
Records: 5 Duplicates: 0 Warnings: 0
--------------
select * from stage_upserts
--------------
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+------------------+
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to | scd_action |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+------------------+
| 1 | A | Simply | red | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (deleted) |
| 2 | B | Plexy | green | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (modified) |
| 4 | E | Lamey | sky | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (modified) |
| 5 | F | Awesomey | NULL | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (modified) |
| 6 | G | Sweety | violet | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (modified) |
| NULL | B | Plexy | orange | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (modified) |
| NULL | E | NoSoLamey | sky | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (modified) |
| NULL | F | Awesomey | pink | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (modified) |
| NULL | G | Sweety | NULL | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (modified) |
| NULL | D | Loosy | indigo | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (new) |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+------------------+
10 rows in set (0.14 sec)
--------------
select * from scd_widget
--------------
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
| 1 | A | Simply | red | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 |
| 2 | B | Plexy | green | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 |
| 3 | C | Basicy | blue | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 |
| 4 | E | Lamey | sky | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 |
| 5 | F | Awesomey | NULL | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 |
| 6 | G | Sweety | violet | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
6 rows in set (0.15 sec)
--------------
insert into scd_widget
(
rowid
, widget_code
, widget_name
, widget_color
, is_current
, valid_from
, valid_to
)
select
rowid
, widget_code
, widget_name
, widget_color
, is_current
, valid_from
, valid_to
from stage_upserts
on duplicate key update
widget_name = values(widget_name)
, widget_color = values(widget_color)
, is_current = values(is_current)
, valid_from = values(valid_from)
, valid_to = values(valid_to)
--------------
Query OK, 15 rows affected (0.15 sec)
Records: 10 Duplicates: 5 Warnings: 0
--------------
select * from scd_widget order by widget_code
--------------
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
| 1 | A | Simply | red | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
| 2 | B | Plexy | green | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
| 7 | B | Plexy | orange | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
| 3 | C | Basicy | blue | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 |
| 11 | D | Loosy | indigo | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
| 4 | E | Lamey | sky | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
| 8 | E | NoSoLamey | sky | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
| 5 | F | Awesomey | NULL | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
| 9 | F | Awesomey | pink | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
| 10 | G | Sweety | NULL | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
| 6 | G | Sweety | violet | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
11 rows in set (0.15 sec)
--------------
select * from scd_widget where is_current = true order by widget_code
--------------
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
| 7 | B | Plexy | orange | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
| 3 | C | Basicy | blue | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 |
| 11 | D | Loosy | indigo | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
| 8 | E | NoSoLamey | sky | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
| 9 | F | Awesomey | pink | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
| 10 | G | Sweety | NULL | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
6 rows in set (0.17 sec)
--------------
set @yesterday = now() - interval 1 day
--------------
Query OK, 0 rows affected (0.14 sec)
--------------
select @yesterday
--------------
+---------------------+
| @yesterday |
+---------------------+
| 2020-03-24 18:57:57 |
+---------------------+
1 row in set (0.15 sec)
--------------
select * from scd_widget where @yesterday >= valid_from and @yesterday < valid_to order by widget_code
--------------
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
| 1 | A | Simply | red | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
| 2 | B | Plexy | green | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
| 3 | C | Basicy | blue | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 |
| 4 | E | Lamey | sky | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
| 5 | F | Awesomey | NULL | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
| 6 | G | Sweety | violet | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+
6 rows in set (0.14 sec)
Bye
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment