Skip to content

Instantly share code, notes, and snippets.

@timotewb
Created February 20, 2021 16:00
Show Gist options
  • Save timotewb/2ea8957837209ccfe4978465153c0b17 to your computer and use it in GitHub Desktop.
Save timotewb/2ea8957837209ccfe4978465153c0b17 to your computer and use it in GitHub Desktop.
%macro scd2(src_table_lib,src_table_name,unique_record_key,tgt_table_lib,tgt_table_name);
/*
src_table_lib: sas libname
src_table_name: sas table name
unique_record_key: unique key, must be a single column not a combination
tgt_table_lib: sas libname
tgt_table_name: sas table name
*/
%put &=src_table_lib.;
%put &=src_table_name.;
%put &=unique_record_key.;
%put &=tgt_table_lib.;
%put &=tgt_table_name.;
*--- assumes column lengths and types in table do not change;
proc sql noprint;
select distinct name into:column_names separated by ","
from dictionary.columns
where upcase(libname) = upcase("&src_table_lib." )
and upcase(memname) = upcase("&src_table_name.")
order by name;
quit;
%put &=column_names.;
*--- add unique key;
data _01_unique;
set &src_table_lib..&src_table_name.;
length db_unique_row $200.;
format db_unique_row $hex64. db_update_dt db_insert_dt datetime.;
db_unique_row = sha256(cats(&column_names.));
db_update_dt = '31DEC9999 0:0:0'dt;
db_insert_dt = datetime();
db_latest_row = 1;
run;
%if %sysfunc(exist(&tgt_table_lib..&tgt_table_name.)) %then %do;
%put NOTE: &tgt_table_lib..&tgt_table_name. exists, running SCD2.;
*--- if exists;
*---identify new or updated rows;
proc sql;
create table _02_new_or_updated as
select t1.*,
t1.db_unique_row as db_unique_row_new
from _01_unique t1
where t1.db_unique_row not in(
select distinct db_unique_row from &tgt_table_lib..&tgt_table_name. where db_latest_row = 1
) ;
quit;
*--- identify what happened to the row;
proc sort data=_02_new_or_updated; by &unique_record_key.; run;
proc sort data=&tgt_table_lib..&tgt_table_name. out=target_sorted(keep=&unique_record_key. db_unique_row db_unique_row_new); by &unique_record_key.; run;
data _03_new _03_change _03_error;
merge target_sorted (in=a)
_02_new_or_updated (in=b drop=db_unique_row); /* we keep the target tables key for amtching updates later */
by &unique_record_key.;
format db_action $10.;
if a and b then do;
db_action = 'change';
db_unique_row = db_unique_row_new;
output _03_change;
end;
else if b and not a then do;
db_action = 'new';
db_unique_row = db_unique_row_new;
output _03_new;
end;
else if a and not b then do;
delete;
end;
else do;
db_action = 'error';
output _03_error;
end;
drop db_unique_row_new;
run;
*--- identify deleted rows;
proc sql;
create table _04_deleted as
select t1.*
from &tgt_table_lib..&tgt_table_name. t1
where t1.&unique_record_key. not in(
select distinct &unique_record_key. from _01_unique
) and t1.db_latest_row = 1 and db_action ne 'delete';
quit;
data _04_deleted;
set _04_deleted;
db_action = 'delete';
run;
*--- update target table for changes;
proc sql;
update &tgt_table_lib..&tgt_table_name.
set db_update_dt = datetime(),
db_latest_row = 0
where &unique_record_key. in(
select distinct &unique_record_key. from _03_change
) and db_latest_row = 1;
quit;
*--- update target table for deleted;
proc sql;
update &tgt_table_lib..&tgt_table_name.
set db_update_dt = datetime(),
db_latest_row = 0
where &unique_record_key. in(
select distinct &unique_record_key. from _04_deleted
) and db_latest_row = 1;
quit;
*--- append change, delete and new rows;
data &tgt_table_lib..&tgt_table_name.;
set _03_new(in=a)
_03_change(in=b)
_04_deleted(in=c)
&tgt_table_lib..&tgt_table_name.(in=d);
if a then db_insert_dt = datetime();
if b then db_insert_dt = datetime();
if c then db_insert_dt = datetime();
run;
%end;
%else %do;
%put NOTE: New table &tgt_table_lib..&tgt_table_name. loading to library.;
*--- if not exist;
*--- load to library;
data &tgt_table_lib..&tgt_table_name.;
set _01_unique;
db_action = 'new';
run;
%end;
%mend;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment