Created
February 20, 2018 14:01
-
-
Save onderkalaci/e0eb5fceb87adb8167c394875b8f6c3f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- =================================================================== | |
| -- test top level window functions that are pushdownable | |
| -- =================================================================== | |
| -- a very simple window function with an aggregate and a window function | |
| -- distribution column is on the partition by clause | |
| SELECT | |
| user_id, COUNT(*) OVER (PARTITION BY user_id), rank() OVER (PARTITION BY user_id) | |
| FROM | |
| users_table | |
| ORDER BY | |
| 1 DESC, 2 DESC, 3 DESC | |
| LIMIT 5; | |
| -- a more complicated window clause, including an aggregate | |
| -- in both the window clause and the target entry | |
| SELECT | |
| user_id, avg(avg(value_3)) OVER (PARTITION BY user_id, MIN(value_2)) | |
| FROM | |
| users_table | |
| GROUP BY | |
| 1 | |
| ORDER BY | |
| 2 DESC NULLS LAST, 1 DESC; | |
| -- window clause operates on the results of a subquery | |
| SELECT | |
| user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2)) | |
| FROM ( | |
| SELECT | |
| DISTINCT us.user_id, us.value_2, value_1, random() as r1 | |
| FROM | |
| users_table as us, events_table | |
| WHERE | |
| us.user_id = events_table.user_id | |
| AND event_type IN (1,2) | |
| ORDER BY | |
| user_id, value_2 | |
| ) s | |
| GROUP BY | |
| 1, value_1 | |
| ORDER BY | |
| 2 DESC, 1; | |
| -- window function operates on the results of | |
| -- a join | |
| SELECT | |
| us.user_id, | |
| SUM(us.value_1) OVER (PARTITION BY us.user_id) | |
| FROM | |
| users_table us | |
| JOIN | |
| events_table ev | |
| ON (us.user_id = ev.user_id) | |
| GROUP BY | |
| 1, | |
| value_1 | |
| ORDER BY | |
| 1, | |
| 2 | |
| LIMIT 5; | |
| -- the same query, but this time join with an alias | |
| SELECT | |
| user_id, value_1, SUM(j.value_1) OVER (PARTITION BY j.user_id) | |
| FROM | |
| (users_table_local us | |
| JOIN | |
| events_table_local ev | |
| USING (user_id ) ) j | |
| GROUP BY | |
| user_id, | |
| value_1 | |
| ORDER BY | |
| 3 DESC, 2 DESC, 1 DESC | |
| LIMIT 5; | |
| -- the following test is not related to window functions | |
| -- but uses some common code path, thus keeping it here | |
| -- for now | |
| WITH users_events AS | |
| ( | |
| SELECT | |
| user_id | |
| FROM | |
| users_table | |
| ) | |
| SELECT | |
| uid, | |
| event_type, | |
| value_2, | |
| value_3 | |
| FROM ( | |
| (SELECT | |
| user_id as uid | |
| FROM | |
| users_events | |
| ) users | |
| JOIN | |
| events_table | |
| ON | |
| users.uid = events_table.event_type | |
| ) a | |
| ORDER BY | |
| 1,2,3,4 | |
| LIMIT 5; | |
| -- querying views that have window functions should be ok | |
| CREATE VIEW window_view AS | |
| SELECT | |
| DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1) | |
| FROM | |
| users_table | |
| GROUP BY | |
| user_id, value_1 | |
| HAVING count(*) > 1; | |
| -- Window function in View works | |
| SELECT * | |
| FROM | |
| window_view | |
| ORDER BY | |
| 2 DESC, 1 | |
| LIMIT | |
| 10; | |
| -- the other way around also should work fine | |
| -- query a view using window functions | |
| CREATE VIEW users_view AS | |
| SELECT * FROM users_table; | |
| SELECT | |
| DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1) | |
| FROM | |
| users_view | |
| GROUP BY | |
| user_id, value_1 | |
| HAVING count(*) > 4; | |
| DROP VIEW users_view, window_view; | |
| -- window function uses columns from two different tables | |
| SELECT | |
| DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk | |
| FROM | |
| events_table, users_table | |
| WHERE | |
| users_table.user_id = events_table.user_id | |
| WINDOW my_win AS (PARTITION BY events_table.user_id, users_table.value_1 ORDER BY events_table.time DESC) | |
| ORDER BY | |
| rnk DESC, 1 DESC | |
| LIMIT 10; | |
| -- the same query with reference tables on the partition by doesn't work | |
| SELECT | |
| DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk | |
| FROM | |
| events_table_ref as events_table, users_table | |
| WHERE | |
| users_table.user_id = events_table.user_id | |
| WINDOW my_win AS (PARTITION BY events_table.user_id, users_table.value_1 ORDER BY events_table.time DESC) | |
| ORDER BY | |
| rnk DESC, 1 DESC | |
| LIMIT 10; | |
| -- ORDER BY in the window function is an aggragate | |
| SELECT | |
| user_id, rank() OVER my_win as rnk, avg(value_2) as avg_val_2 | |
| FROM | |
| events_table | |
| GROUP BY | |
| user_id, date_trunc('day', time) | |
| WINDOW my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC) | |
| ORDER BY | |
| 3 DESC, 2 DESC, 1 DESC; | |
| -- lets push the limits of writing complex expressions aling with the window functions | |
| SELECT | |
| COUNT(*) OVER (PARTITION BY user_id, user_id + 1), rank() OVER (PARTITION BY user_id) as cnt1, | |
| COUNT(*) OVER (PARTITION BY user_id, abs(value_1 - value_2)) as cnt2, | |
| date_trunc('min', lag(time) OVER (PARTITION BY user_id)) as datee, | |
| rank() OVER my_win as rnnk, | |
| avg(case | |
| when user_id > 4 | |
| then value_1 | |
| else value_2 | |
| end) FILTER (WHERE user_id > 2) OVER my_win_2 as filtered_count, | |
| sum(user_id * (5.0 / (value_1 + value_2 + 0.1)) * value_3) FILTER (WHERE value_1::text LIKE '%1%') OVER my_win_4 as cnt_with_filter_2 | |
| FROM | |
| users_table | |
| WINDOW my_win AS (PARTITION BY user_id, (value_1%3)::int ORDER BY time DESC), | |
| my_win_2 AS (PARTITION BY user_id, (value_1)::int ORDER BY time DESC), | |
| my_win_3 AS (PARTITION BY user_id, date_trunc('min', time)), | |
| my_win_4 AS (my_win_3 ORDER BY value_2, value_3) | |
| ORDER BY | |
| cnt_with_filter_2 DESC NULLS LAST, filtered_count DESC NULLS LAST, datee DESC NULLS LAST, rnnk DESC, cnt2 DESC, cnt1 DESC, user_id DESC | |
| LIMIT 5; | |
| -- some tests with GROUP BY along with PARTITION BY | |
| SELECT | |
| user_id, | |
| rank() OVER my_win as my_rank, | |
| avg(avg(event_type)) OVER my_win_2 as avg, | |
| max(time) as mx_time | |
| FROM | |
| events_table | |
| GROUP BY | |
| user_id, value_2 | |
| WINDOW | |
| my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC), | |
| my_win_2 AS (PARTITION BY user_id, avg(user_id) ORDER BY count(*) DESC) | |
| ORDER BY avg DESC, | |
| mx_time DESC, my_rank DESC, user_id DESC; | |
| -- test for range and rows mode and different window functions | |
| -- mostly to make sure that deparsing works fine | |
| SELECT | |
| user_id, rank() OVER (PARTITION BY user_id ROWS BETWEEN | |
| UNBOUNDED PRECEDING AND CURRENT ROW), | |
| dense_rank() OVER (PARTITION BY user_id RANGE BETWEEN | |
| UNBOUNDED PRECEDING AND CURRENT ROW), | |
| CUME_DIST() OVER (PARTITION BY user_id RANGE BETWEEN | |
| UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), | |
| PERCENT_RANK() OVER (PARTITION BY user_id ORDER BY avg(value_1) RANGE BETWEEN | |
| UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) | |
| FROM | |
| users_table | |
| GROUP BY | |
| 1 | |
| ORDER BY | |
| 4 DESC,3 DESC,2 DESC ,1 DESC; | |
| -- some tests with GROUP BY, HAVING and LIMIT | |
| SELECT | |
| user_id, sum(event_type) OVER my_win , event_type | |
| FROM | |
| events_table | |
| GROUP BY | |
| user_id, event_type | |
| HAVING count(*) > 2 | |
| WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC) | |
| ORDER BY | |
| 2 DESC, 3 DESC, 1 DESC | |
| LIMIT | |
| 5; | |
| -- Group by has more columns than partition by | |
| -- !!!!!!!!!!!!!!!!WRONG RESULTS | |
| SELECT | |
| DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id) | |
| FROM | |
| users_table | |
| GROUP BY | |
| user_id, value_1, value_2 | |
| HAVING count(*) > 2 | |
| ORDER BY | |
| 2 DESC, 1 | |
| LIMIT | |
| 10; | |
| -- TODO:: ADD SOME TEST WITH DISTINCT ON as well | |
| -- not a meaningful query, with interesting syntax | |
| SELECT | |
| user_id, | |
| AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)), | |
| AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1)) | |
| FROM | |
| users_table | |
| GROUP BY | |
| 1 | |
| ORDER BY | |
| 3 DESC, 2 DESC, 1 DESC; | |
| -- we've fixed a bug related to joins w/wout alias | |
| -- while implementing top window functions | |
| -- thus adding some tests related to that (i.e., next 3 tests) | |
| WITH users_events AS | |
| ( | |
| SELECT | |
| user_id | |
| FROM | |
| users_table | |
| ) | |
| SELECT | |
| uid, | |
| event_type, | |
| value_2, | |
| value_3 | |
| FROM ( | |
| (SELECT | |
| user_id as uid | |
| FROM | |
| users_events | |
| ) users | |
| JOIN | |
| events_table | |
| ON | |
| users.uid = events_table.event_type | |
| ) a | |
| ORDER BY | |
| 1,2,3,4 | |
| LIMIT 5; | |
| -- the following queries are almost the same, | |
| -- the only difference is the final GROUP BY | |
| SELECT a.user_id, avg(b.value_2) as subquery_avg | |
| FROM | |
| (SELECT | |
| user_id | |
| FROM | |
| users_table | |
| WHERE | |
| (value_1 > 2) | |
| GROUP BY | |
| user_id | |
| HAVING | |
| count(distinct value_1) > 2 | |
| ) as a | |
| LEFT JOIN | |
| (SELECT | |
| DISTINCT ON (value_2) value_2 , user_id, value_3 | |
| FROM | |
| users_table | |
| WHERE | |
| (value_1 > 3) | |
| ORDER BY | |
| 1,2,3 | |
| ) AS b | |
| USING (user_id) | |
| GROUP BY user_id; | |
| -- see the comment for the above query | |
| SELECT a.user_id, avg(b.value_2) as subquery_avg | |
| FROM | |
| (SELECT | |
| user_id | |
| FROM | |
| users_table | |
| WHERE | |
| (value_1 > 2) | |
| GROUP BY | |
| user_id | |
| HAVING | |
| count(distinct value_1) > 2 | |
| ) as a | |
| LEFT JOIN | |
| (SELECT | |
| DISTINCT ON (value_2) value_2 , user_id, value_3 | |
| FROM | |
| users_table | |
| WHERE | |
| (value_1 > 3) | |
| ORDER BY | |
| 1,2,3 | |
| ) AS b | |
| USING (user_id) | |
| GROUP BY a.user_id; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment