Last active
March 26, 2020 02:00
-
-
Save raven-rock/ff715978ae75fdf67e42f764ed1566c3 to your computer and use it in GitHub Desktop.
Example of how to update a Type 2 slowly changing dimension table from staged data using MySQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
In this example, | |
- rowid is simply a rowid for the scd. Perhaps to avoid confusion we actually always name this "rowid". | |
- widget_code is the durable natural key (could be a composite set of columns) | |
- widget_name is the change-tracked attribute (could be set of columns) | |
- widget_color is another the change-tracked attribute (could be set of columns) | |
*/ | |
set @infinity = '9999-12-31 23:59:59'; | |
-- Set up scd | |
create temporary table scd_widget | |
( | |
rowid int unsigned auto_increment | |
, widget_code varchar(20) | |
, widget_name varchar(20) | |
, widget_color varchar(20) | |
, is_current bool not null default true | |
, valid_from datetime | |
, valid_to datetime default '9999-12-31 23:59:59' | |
, primary key (rowid) | |
, index ( | |
widget_code | |
, widget_name | |
, widget_color | |
, valid_from | |
, valid_to | |
) | |
) | |
collate latin1_swedish_ci | |
; | |
insert into scd_widget | |
( | |
widget_code | |
, widget_name | |
, widget_color | |
, valid_from | |
) | |
values | |
('A', 'Simply', 'red', now() - interval 2 day) | |
, ('B', 'Plexy', 'green', now() - interval 2 day) | |
, ('C', 'Basicy', 'blue', now() - interval 2 day) | |
, ('E', 'Lamey', 'sky', now() - interval 2 day) | |
, ('F', 'Awesomey', null, now() - interval 2 day) | |
, ('G', 'Sweety', 'violet', now() - interval 2 day) | |
; | |
-- select * from scd_widget; | |
-- Set up stage | |
create temporary table stage_widget | |
( | |
widget_code varchar(20) | |
, widget_name varchar(20) | |
, widget_color varchar(20) | |
, primary key (widget_code) | |
) | |
collate latin1_swedish_ci | |
; | |
insert into stage_widget | |
( | |
widget_code | |
, widget_name | |
, widget_color | |
) | |
values | |
-- ('A', 'red') -- deleted | |
('B', 'Plexy', 'orange') -- changed | |
, ('C', 'Basicy', 'blue') | |
, ('D', 'Loosy', 'indigo') -- added | |
, ('E', 'NoSoLamey', 'sky') | |
, ('F', 'Awesomey', 'pink') -- set color from null to something | |
, ('G', 'Sweety', null) -- set color from something to null | |
; | |
-- select * from stage_widget; | |
-- Changes (excl new) | |
set @tx_datetime = now(); -- tranaction datetime stamp for setting valid_from/to to same moment in time | |
create temporary table stage_upserts | |
( | |
rowid int unsigned default null | |
, widget_code varchar(20) | |
, widget_name varchar(20) | |
, widget_color varchar(20) | |
, is_current bool not null default true | |
, valid_from datetime | |
, valid_to datetime default '9999-12-31 23:59:59' | |
, scd_action varchar(25) | |
) | |
collate latin1_swedish_ci | |
; | |
-- close deleted records and old records that have a new modified record | |
insert into stage_upserts | |
select | |
d.rowid | |
, d.widget_code | |
, d.widget_name | |
, d.widget_color | |
, false as is_current | |
, d.valid_from | |
, @tx_datetime as valid_to | |
-- , d.widget_color as x_old_widget_color | |
, case | |
when s.widget_code is null then 'close (deleted)' | |
else 'close (modified)' | |
end as scd_action | |
from scd_widget d | |
left join stage_widget s on s.widget_code = d.widget_code | |
where | |
( | |
s.widget_code is null -- means it has been "deleted" | |
or | |
( -- means it has been modified | |
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'') | |
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'') | |
) | |
) | |
; | |
-- open new and modified records | |
insert into stage_upserts | |
select | |
null as rowid | |
, s.widget_code | |
, s.widget_name | |
, s.widget_color | |
, true as is_current | |
, @tx_datetime as valid_from | |
, @infinity as valid_to | |
, case | |
when d.widget_code is null then 'open (new)' | |
else 'open (modified)' | |
end as scd_action | |
from stage_widget s | |
left join scd_widget d on d.widget_code = s.widget_code and d.is_current = true | |
where | |
( | |
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'') | |
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'') | |
) | |
; | |
-- all the changes to apply | |
select * from stage_upserts ; | |
-- initial state | |
select * from scd_widget; | |
-- apply all changes via upsert | |
insert into scd_widget | |
( | |
rowid | |
, widget_code | |
, widget_name | |
, widget_color | |
, is_current | |
, valid_from | |
, valid_to | |
) | |
select | |
rowid | |
, widget_code | |
, widget_name | |
, widget_color | |
, is_current | |
, valid_from | |
, valid_to | |
from stage_upserts | |
on duplicate key update | |
widget_name = values(widget_name) | |
, widget_color = values(widget_color) | |
, is_current = values(is_current) | |
, valid_from = values(valid_from) | |
, valid_to = values(valid_to) | |
; | |
-- final state | |
select * from scd_widget order by widget_code; | |
-- is_current=true dimension | |
select * from scd_widget where is_current = true order by widget_code; | |
-- yesterday view of dimension | |
set @yesterday = now() - interval 1 day; | |
select @yesterday; | |
select * from scd_widget where @yesterday >= valid_from and @yesterday < valid_to order by widget_code; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-------------- | |
set @infinity = '9999-12-31 23:59:59' | |
-------------- | |
Query OK, 0 rows affected (0.16 sec) | |
-------------- | |
create temporary table scd_widget | |
( | |
rowid int unsigned auto_increment | |
, widget_code varchar(20) | |
, widget_name varchar(20) | |
, widget_color varchar(20) | |
, is_current bool not null default true | |
, valid_from datetime | |
, valid_to datetime default '9999-12-31 23:59:59' | |
, primary key (rowid) | |
, index ( | |
widget_code | |
, widget_name | |
, widget_color | |
, valid_from | |
, valid_to | |
) | |
) | |
collate latin1_swedish_ci | |
-------------- | |
Query OK, 0 rows affected (0.14 sec) | |
-------------- | |
insert into scd_widget | |
( | |
widget_code | |
, widget_name | |
, widget_color | |
, valid_from | |
) | |
values | |
('A', 'Simply', 'red', now() - interval 2 day) | |
, ('B', 'Plexy', 'green', now() - interval 2 day) | |
, ('C', 'Basicy', 'blue', now() - interval 2 day) | |
, ('E', 'Lamey', 'sky', now() - interval 2 day) | |
, ('F', 'Awesomey', null, now() - interval 2 day) | |
, ('G', 'Sweety', 'violet', now() - interval 2 day) | |
-------------- | |
Query OK, 6 rows affected (0.15 sec) | |
Records: 6 Duplicates: 0 Warnings: 0 | |
-------------- | |
create temporary table stage_widget | |
( | |
widget_code varchar(20) | |
, widget_name varchar(20) | |
, widget_color varchar(20) | |
, primary key (widget_code) | |
) | |
collate latin1_swedish_ci | |
-------------- | |
Query OK, 0 rows affected (0.14 sec) | |
-------------- | |
insert into stage_widget | |
( | |
widget_code | |
, widget_name | |
, widget_color | |
) | |
values | |
('B', 'Plexy', 'orange') | |
, ('C', 'Basicy', 'blue') | |
, ('D', 'Loosy', 'indigo') | |
, ('E', 'NoSoLamey', 'sky') | |
, ('F', 'Awesomey', 'pink') | |
, ('G', 'Sweety', null) | |
-------------- | |
Query OK, 6 rows affected (0.14 sec) | |
Records: 6 Duplicates: 0 Warnings: 0 | |
-------------- | |
set @tx_datetime = now() | |
-------------- | |
Query OK, 0 rows affected (0.14 sec) | |
-------------- | |
create temporary table stage_upserts | |
( | |
rowid int unsigned default null | |
, widget_code varchar(20) | |
, widget_name varchar(20) | |
, widget_color varchar(20) | |
, is_current bool not null default true | |
, valid_from datetime | |
, valid_to datetime default '9999-12-31 23:59:59' | |
, scd_action varchar(25) | |
) | |
collate latin1_swedish_ci | |
-------------- | |
Query OK, 0 rows affected (0.15 sec) | |
-------------- | |
insert into stage_upserts | |
select | |
d.rowid | |
, d.widget_code | |
, d.widget_name | |
, d.widget_color | |
, false as is_current | |
, d.valid_from | |
, @tx_datetime as valid_to | |
, case | |
when s.widget_code is null then 'close (deleted)' | |
else 'close (modified)' | |
end as scd_action | |
from scd_widget d | |
left join stage_widget s on s.widget_code = d.widget_code | |
where | |
( | |
s.widget_code is null | |
or | |
( | |
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'') | |
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'') | |
) | |
) | |
-------------- | |
Query OK, 5 rows affected (0.14 sec) | |
Records: 5 Duplicates: 0 Warnings: 0 | |
-------------- | |
insert into stage_upserts | |
select | |
null as rowid | |
, s.widget_code | |
, s.widget_name | |
, s.widget_color | |
, true as is_current | |
, @tx_datetime as valid_from | |
, @infinity as valid_to | |
, case | |
when d.widget_code is null then 'open (new)' | |
else 'open (modified)' | |
end as scd_action | |
from stage_widget s | |
left join scd_widget d on d.widget_code = s.widget_code and d.is_current = true | |
where | |
( | |
coalesce(s.widget_name,'') <> coalesce(d.widget_name,'') | |
or coalesce(s.widget_color,'') <> coalesce(d.widget_color,'') | |
) | |
-------------- | |
Query OK, 5 rows affected (0.15 sec) | |
Records: 5 Duplicates: 0 Warnings: 0 | |
-------------- | |
select * from stage_upserts | |
-------------- | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+------------------+ | |
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to | scd_action | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+------------------+ | |
| 1 | A | Simply | red | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (deleted) | | |
| 2 | B | Plexy | green | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (modified) | | |
| 4 | E | Lamey | sky | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (modified) | | |
| 5 | F | Awesomey | NULL | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (modified) | | |
| 6 | G | Sweety | violet | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | close (modified) | | |
| NULL | B | Plexy | orange | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (modified) | | |
| NULL | E | NoSoLamey | sky | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (modified) | | |
| NULL | F | Awesomey | pink | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (modified) | | |
| NULL | G | Sweety | NULL | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (modified) | | |
| NULL | D | Loosy | indigo | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | open (new) | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+------------------+ | |
10 rows in set (0.14 sec) | |
-------------- | |
select * from scd_widget | |
-------------- | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
| 1 | A | Simply | red | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 | | |
| 2 | B | Plexy | green | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 | | |
| 3 | C | Basicy | blue | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 | | |
| 4 | E | Lamey | sky | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 | | |
| 5 | F | Awesomey | NULL | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 | | |
| 6 | G | Sweety | violet | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
6 rows in set (0.15 sec) | |
-------------- | |
insert into scd_widget | |
( | |
rowid | |
, widget_code | |
, widget_name | |
, widget_color | |
, is_current | |
, valid_from | |
, valid_to | |
) | |
select | |
rowid | |
, widget_code | |
, widget_name | |
, widget_color | |
, is_current | |
, valid_from | |
, valid_to | |
from stage_upserts | |
on duplicate key update | |
widget_name = values(widget_name) | |
, widget_color = values(widget_color) | |
, is_current = values(is_current) | |
, valid_from = values(valid_from) | |
, valid_to = values(valid_to) | |
-------------- | |
Query OK, 15 rows affected (0.15 sec) | |
Records: 10 Duplicates: 5 Warnings: 0 | |
-------------- | |
select * from scd_widget order by widget_code | |
-------------- | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
| 1 | A | Simply | red | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
| 2 | B | Plexy | green | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
| 7 | B | Plexy | orange | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
| 3 | C | Basicy | blue | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 | | |
| 11 | D | Loosy | indigo | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
| 4 | E | Lamey | sky | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
| 8 | E | NoSoLamey | sky | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
| 5 | F | Awesomey | NULL | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
| 9 | F | Awesomey | pink | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
| 10 | G | Sweety | NULL | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
| 6 | G | Sweety | violet | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
11 rows in set (0.15 sec) | |
-------------- | |
select * from scd_widget where is_current = true order by widget_code | |
-------------- | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
| 7 | B | Plexy | orange | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
| 3 | C | Basicy | blue | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 | | |
| 11 | D | Loosy | indigo | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
| 8 | E | NoSoLamey | sky | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
| 9 | F | Awesomey | pink | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
| 10 | G | Sweety | NULL | 1 | 2020-03-25 18:57:55 | 9999-12-31 23:59:59 | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
6 rows in set (0.17 sec) | |
-------------- | |
set @yesterday = now() - interval 1 day | |
-------------- | |
Query OK, 0 rows affected (0.14 sec) | |
-------------- | |
select @yesterday | |
-------------- | |
+---------------------+ | |
| @yesterday | | |
+---------------------+ | |
| 2020-03-24 18:57:57 | | |
+---------------------+ | |
1 row in set (0.15 sec) | |
-------------- | |
select * from scd_widget where @yesterday >= valid_from and @yesterday < valid_to order by widget_code | |
-------------- | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
| rowid | widget_code | widget_name | widget_color | is_current | valid_from | valid_to | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
| 1 | A | Simply | red | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
| 2 | B | Plexy | green | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
| 3 | C | Basicy | blue | 1 | 2020-03-23 18:57:55 | 9999-12-31 23:59:59 | | |
| 4 | E | Lamey | sky | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
| 5 | F | Awesomey | NULL | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
| 6 | G | Sweety | violet | 0 | 2020-03-23 18:57:55 | 2020-03-25 18:57:55 | | |
+-------+-------------+-------------+--------------+------------+---------------------+---------------------+ | |
6 rows in set (0.14 sec) | |
Bye |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment