Skip to content

Instantly share code, notes, and snippets.

@monimiller
Last active August 23, 2023 18:55
Show Gist options
  • Save monimiller/67209ce91137eeb67bd35cbcf8224ae6 to your computer and use it in GitHub Desktop.
Save monimiller/67209ce91137eeb67bd35cbcf8224ae6 to your computer and use it in GitHub Desktop.
CREATE TABLE
orders (
event_id INTEGER,
created_ts TIMESTAMP,
metric INTEGER,
year VARCHAR,
month VARCHAR,
day VARCHAR
);
-- create iceberg table partitioned by day on the created_ts column
CREATE TABLE
orders_iceberg (
event_id INTEGER,
created_ts TIMESTAMP(6),
metric INTEGER
)
WITH
(
TYPE = 'iceberg',
partitioning = ARRAY['day(created_ts)']
);
-- insert rows
INSERT INTO orders_iceberg VALUES (1,timestamp '2022-09-10 10:45:38.527000',5.5);
INSERT INTO orders_iceberg VALUES (1,timestamp '2022-09-11 03:12:23.522000',5.5);
INSERT INTO orders_iceberg VALUES (1,timestamp '2022-09-12 10:46:13.516000',5.5);
INSERT INTO orders_iceberg VALUES (1,timestamp '2022-09-13 04:34:05.577000',5.5);
INSERT INTO orders_iceberg VALUES (1,timestamp '2022-09-14 09:10:23.517000',5.5);
-- query the table only looking for certain days
SELECT * FROM orders_iceberg
WHERE created_ts BETWEEN date '2022-09-10' AND date '2022-09-12';
ALTER TABLE <table> EXECUTE optimize;
ALTER TABLE <table> EXECUTE optimize(file_size_threshold => '10MB');
ALTER TABLE <table> EXECUTE optimize WHERE "$file_modified_time" > current_date - interval '1' day;
INSERT INTO
customer_iceberg
VALUES
(
90000,
'Testing',
'33 Main',
3,
'303-867-5309',
323,
'MACHINERY',
'Testing Iceberg'
);
UPDATE customer_iceberg SET name = 'Tim Rogers' WHERE custkey = 2732;
DELETE FROM customer_iceberg WHERE custkey = 2732;
MERGE INTO s3lakehouse.blog.customer_base AS b
USING (SELECT * FROM s3lakehouse.blog.customer_land) AS l
ON (b.custkey = l.custkey)
WHEN NOT MATCHED
THEN INSERT (custkey, name, state, zip, cust_since,last_update_dt)
VALUES(l.custkey, l.name, l.state, l.zip, l.cust_since,l.last_update_dt);
MERGE INTO s3lakehouse.blog.customer_base AS b
USING s3lakehouse.blog.customer_land AS l
ON (b.custkey = l.custkey)
WHEN MATCHED and b.name != l.name
THEN UPDATE
SET name = l.name ,
state = l.state,
zip = l.zip,
cust_since = l.cust_since
WHEN NOT MATCHED
THEN INSERT (custkey, name, state, zip, cust_since,last_update_dt)
VALUES(l.custkey, l.name, l.state, l.zip, l.cust_since,l.last_update_dt);
MERGE INTO s3lakehouse.blog.customer_base AS b
USING
( SELECT null AS custkey_match, custkey, name, state, zip, cust_since, last_update_dt,'Y' AS active_ind,current_timestamp AS end_dt
FROM s3lakehouse.blog.customer_land
UNION ALL
SELECT
custkey AS custkey_match,custkey, name, state, zip, cust_since, last_update_dt,active_ind,end_dt
FROM s3lakehouse.blog.customer_base
WHERE custkey IN
(SELECT custkey FROM s3lakehouse.blog.customer_land WHERE active_ind = 'Y')
) AS scdChangeRows
ON (b.custkey = scdChangeRows.custkey and b.custkey = scdChangeRows.custkey_match)
WHEN MATCHED and b.active_ind = 'Y' THEN
UPDATE SET end_dt = current_timestamp,active_ind = 'N'
WHEN NOT MATCHED THEN
INSERT (custkey, name, state, zip, cust_since,last_update_dt,active_ind,end_dt)
VALUES(scdChangeRows.custkey, scdChangeRows.name, scdChangeRows.state, scdChangeRows.zip,
scdChangeRows.cust_since,scdChangeRows.last_update_dt,'Y',null);
ALTER TABLE customer_iceberg RENAME TO customer_iceberg_new;
ALTER TABLE customer_iceberg RENAME COLUMN address TO fulladdress;
ALTER TABLE customer_iceberg ADD COLUMN tier VARCHAR(1);
CREATE TABLE orders_iceberg
WITH (partitioning = ARRAY['month(orderdate)']) AS
SELECT * FROM tpch.sf1.orders;
ALTER TABLE orders_iceberg SET PROPERTIES partitioning = ARRAY['day(orderdate)'];
CREATE TABLE orders_iceberg_new
WITH (partitioning = ARRAY['day(orderdate)']) AS
SELECT * FROM orders_iceberg;
SELECT * FROM "customer_iceberg$snapshots";
SELECT custkey,name
FROM customer_iceberg FOR VERSION
AS OF 5043425904354141100 WHERE custkey = 2732;
SELECT custkey,name
FROM customer_iceberg FOR VERSION
AS OF 3117754680069542695 WHERE custkey = 2732;
SELECT custkey,name
FROM customer_iceberg FOR TIMESTAMP
AS OF TIMESTAMP '2022-09-18 07:18:09.002 America/New_York' where custkey = 2732;
CALL iceberg.system.rollback_to_snapshot('demo_tpch', 'customer_iceberg', 5043425904354141100);
SELECT custkey,name
FROM customer_iceberg
WHERE custkey = 2732;
SELECT * FROM customer_iceberg WHERE custkey = 111029;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment