Skip to content

Instantly share code, notes, and snippets.

@drewbanin
Created October 8, 2016 16:25
Show Gist options
  • Save drewbanin/cbd91114a98e92a27111a95382660e18 to your computer and use it in GitHub Desktop.
Save drewbanin/cbd91114a98e92a27111a95382660e18 to your computer and use it in GitHub Desktop.
with current_data as (
select * from {{ source_table }}
),
archived_data as (
select * from {{ this }}
),
combined as (
select [columns, in, source, table],
{{ updated_at }} as dbt_updated_at,
{{ unique_key }} as dbt_pk
from current_data
union all
-- very important; don't include valid_from, valid_to fields
-- otherwise the union won't work!. We can't just have fake null
-- valid_from, valid_to fields b/c then we can't select combined.* below!
select [columns, in, source, table]
{{ updated_at }} as dbt_updated_at,
{{ unique_key }} as dbt_pk
from archived_data
),
merged as (
select
distinct
combined.*,
least(combined.dbt_updated_at, current_data.dbt_updated_at) as valid_from,
case when combined.dbt_updated_at = current_data.dbt_updated_at then null
else current_data.dbt_updated_at
end as valid_to
from current_data
left outer join combined
on combined.dbt_pk = current_data.dbt_pk
and current_data.dbt_updated_at >= combined.dbt_updated_at
),
with_id as (
select *,
row_number() over (partition by dbt_pk order by dbt_updated_at asc) as dbt_archive_id
from merged
)
select md5(dbt_pk || '|' || dbt_archive_id) as scd_id, *
from with_id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment