Skip to content

Instantly share code, notes, and snippets.

@mshakhomirov
Created April 7, 2023 13:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mshakhomirov/9becb1310447b552e5d4727e37456f00 to your computer and use it in GitHub Desktop.
Save mshakhomirov/9becb1310447b552e5d4727e37456f00 to your computer and use it in GitHub Desktop.
CREATE TABLE IF NOT EXISTS mydatabase.user_transactions (
dt date,
user_id int,
total_cost_usd float,
registration_date string
)
PARTITIONED BY (dt)
LOCATION 's3://datalake.staging.aws/data/myschema/optimized-data-iceberg-parquet/'
TBLPROPERTIES (
'table_type'='ICEBERG',
'format'='parquet',
'write_target_data_file_size_bytes'='536870912',
'optimize_rewrite_delete_file_threshold'='10'
)
;
MERGE INTO mydatabase.user_transactions as ut
USING (
SELECT
date(dt) dt
, user_id
, sum(total_cost) total_cost_usd
, registration_date
FROM mydatabase.transactions
LEFT JOIN mydatabase.users
ON users.id = transactions.user_id
GROUP BY
dt
, user_id
, registration_date
) as ut2
ON (ut.dt = ut2.dt and ut.user_id = ut2.user_id)
WHEN MATCHED
THEN UPDATE
SET total_cost_usd = ut2.total_cost_usd, registration_date = ut2.registration_date
WHEN NOT MATCHED
THEN INSERT (
dt
,user_id
,total_cost_usd
,registration_date
)
VALUES (
ut2.dt
,ut2.user_id
,ut2.total_cost_usd
,ut2.registration_date
)
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment