-
-
Save matbenyo/8b7780684694bbbbf90ee9aef1336702 to your computer and use it in GitHub Desktop.
Sample queries for the medium article about QUALIFY clause
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
Sample queries for the medium article about QUALIFY clause | |
==================================================================== | |
#Don't forget to comment out code blocks, and uncomment relevant ones. | |
*/ | |
#Generate sample data (TMNT) | |
CREATE TEMP TABLE pizza_events AS( | |
SELECT 'Leonardo' AS ninja, TIMESTAMP('2024-06-01 10:00:00') AS event_timestamp, 'item_viewed' AS action, 'Margherita' AS pizza_name UNION ALL | |
SELECT 'Michelangelo', TIMESTAMP('2024-06-01 10:05:00'), 'item_viewed', 'Pepperoni' UNION ALL | |
SELECT 'Leonardo', TIMESTAMP('2024-06-01 10:10:00'), 'item_viewed', 'Pepperoni' UNION ALL | |
SELECT 'Michelangelo', TIMESTAMP('2024-06-01 10:15:00'), 'item_viewed', 'Margherita' UNION ALL | |
SELECT 'Leonardo', TIMESTAMP('2024-06-01 10:20:00'), 'purchased', 'Margherita' UNION ALL | |
SELECT 'Michelangelo', TIMESTAMP('2024-06-01 10:25:00'), 'purchased', 'Margherita' | |
); | |
/* | |
Source table | |
+--------------+--------------------------------+-------------+------------+ | |
| ninja | event_timestamp | action | pizza_name | | |
+--------------+--------------------------------+-------------+------------+ | |
| Leonardo | 2024-06-01 10:00:00.000000 UTC | item_viewed | Margherita | | |
| Michelangelo | 2024-06-01 10:05:00.000000 UTC | item_viewed | Pepperoni | | |
| Leonardo | 2024-06-01 10:10:00.000000 UTC | item_viewed | Pepperoni | | |
| Michelangelo | 2024-06-01 10:15:00.000000 UTC | item_viewed | Margherita | | |
| Leonardo | 2024-06-01 10:20:00.000000 UTC | purchased | Margherita | | |
| Michelangelo | 2024-06-01 10:25:00.000000 UTC | purchased | Margherita | | |
+--------------+--------------------------------+-------------+------------+ | |
Expected result | |
+--------------+------------+--------------------------------+ | |
| ninja | pizza_name | first_timestamp | | |
+--------------+------------+--------------------------------+ | |
| Leonardo | Margherita | 2024-06-01 10:00:00.000000 UTC | | |
| Michelangelo | Pepperoni | 2024-06-01 10:05:00.000000 UTC | | |
+--------------+------------+--------------------------------+ | |
*/ | |
#Solution 1: Naive Approach | |
--Assign a sequential number to each pizza view event (using the ROW_NUMBER window function). | |
--Select the first row in a subquery (using WHERE). | |
WITH first_interaction AS ( | |
SELECT | |
ninja, | |
MIN(event_timestamp) AS first_timestamp | |
FROM | |
pizza_events | |
WHERE | |
action = 'item_viewed' | |
GROUP BY | |
ninja | |
) | |
SELECT | |
e.ninja, | |
e.pizza_name, | |
f.first_timestamp | |
FROM | |
pizza_events e | |
JOIN | |
first_interaction f | |
ON | |
e.ninja = f.ninja AND e.event_timestamp = f.first_timestamp | |
WHERE | |
e.action = 'item_viewed'; | |
#Solution 2: A Better Alternative | |
-- Assign a sequential number to each pizza view event (using the ROW_NUMBER window function). | |
-- Select the first row in a subquery (using WHERE). | |
WITH numbered_interactions AS ( | |
SELECT | |
ninja, | |
pizza_name, | |
event_timestamp, | |
ROW_NUMBER() OVER(PARTITION BY ninja ORDER BY event_timestamp) AS row_num | |
FROM | |
pizza_events | |
WHERE | |
action = 'item_viewed' | |
) | |
SELECT | |
ninja, | |
pizza_name, | |
event_timestamp | |
FROM | |
numbered_interactions | |
WHERE | |
row_num = 1; | |
#Solution 3: We're Almost There | |
-- Assign a sequential number to each pizza view event (using the ROW_NUMBER window function). | |
-- Filter to retain only the first row (using QUALIFY). | |
SELECT | |
ninja, | |
pizza_name, | |
event_timestamp, | |
ROW_NUMBER() OVER(PARTITION BY ninja ORDER BY event_timestamp) AS row_num | |
FROM | |
pizza_events | |
WHERE | |
action = 'item_viewed' | |
QUALIFY | |
row_num = 1; | |
#Solution 4: Here We Go | |
-- Similar to Solution 3, but the row number is used directly within the QUALIFY clause, avoiding its inclusion in the SELECT statement. | |
SELECT | |
ninja, | |
pizza_name, | |
event_timestamp | |
FROM | |
pizza_events | |
WHERE | |
action = 'item_viewed' | |
QUALIFY | |
ROW_NUMBER() OVER(PARTITION BY ninja ORDER BY event_timestamp) = 1; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment