Skip to content

Instantly share code, notes, and snippets.

@sdebruyn
Created August 11, 2023 14:35
Show Gist options
  • Save sdebruyn/a773560dab038830586447eeee630923 to your computer and use it in GitHub Desktop.
Save sdebruyn/a773560dab038830586447eeee630923 to your computer and use it in GitHub Desktop.
dbt incremental model with hard deletes
{{
config(
materialized='incremental',
incremental_strategy='merge',
unique_key='customer_id',
post_hook="delete from {{ this }} where _is_deleted = 1"
)
}}
with orders as (
select
order_id,
customer_id,
shipment_address,
payment_status,
last_updated_at
from {{ ref('stg_orders') }}
-- you could also move this more downwards as SQL engines should be smart enough to push-down the predicate
-- but exp shows that it's not always the case and it's better to filter early
{% if is_incremental() %}
where last_updated_at > (select max(last_updated_at) from {{ this }})
{% endif %}
),
order_lines as (
select
order_id,
product_id,
quantity,
price
from {{ ref('stg_order_line_items') }}
),
customers as (
select
customer_id,
first_name,
last_name,
last_updated_at
from {{ ref('stg_customers') }}
{% if is_incremental() %}
-- you could also move this more downwards as SQL engines should be smart enough to push-down the predicate
-- but exp shows that it's not always the case and it's better to filter early
where last_updated_at > (select max(last_updated_at) from {{ this }})
or customer_id in (
select distinct customer_id
from orders
)
{% endif %}
),
order_lines_total_price(
select
*,
quantity * price as total_price
from order_lines
),
order_values(
select
o.order_id,
o.customer_id,
o.shipment_address,
o.payment_status,
max(o.last_updated_at) as last_updated_at,
sum(total_price) as order_value
from order_lines_total_price ol
inner join orders o
on o.order_id = ol.order_id
group by 1, 2, 3, 4
),
customer_values(
select
c.customer_id,
c.first_name,
c.last_name,
max(v.last_updated_at) as o_last_updated_at,
sum(v.order_value) as customer_value
from order_values v
inner join customers c
on c.customer_id = v.customer_id
group by 1, 2, 3, 4
),
top_1000_customers as (
select
top 1000
customer_id,
first_name,
last_name,
customer_value,
greatest(o_last_updated_at, last_updated_at) as last_updated_at
from customer_values
order by customer_value desc
),
final as (
select
*,
0 as _is_deleted
from top_1000_customers
union all
select
customer_id,
first_name,
last_name,
customer_value,
last_updated_at,
1 as _is_deleted
from {{ this }}
where customer_id not in (select customer_id from top_1000_customers)
)
select *
from final
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment