Skip to content

Instantly share code, notes, and snippets.

@onderkalaci
Created February 20, 2018 14:01
Show Gist options
  • Select an option

  • Save onderkalaci/e0eb5fceb87adb8167c394875b8f6c3f to your computer and use it in GitHub Desktop.

Select an option

Save onderkalaci/e0eb5fceb87adb8167c394875b8f6c3f to your computer and use it in GitHub Desktop.
-- ===================================================================
-- test top level window functions that are pushdownable
-- ===================================================================
-- a very simple window function with an aggregate and a window function
-- distribution column is on the partition by clause
SELECT
user_id, COUNT(*) OVER (PARTITION BY user_id), rank() OVER (PARTITION BY user_id)
FROM
users_table
ORDER BY
1 DESC, 2 DESC, 3 DESC
LIMIT 5;
-- a more complicated window clause, including an aggregate
-- in both the window clause and the target entry
SELECT
user_id, avg(avg(value_3)) OVER (PARTITION BY user_id, MIN(value_2))
FROM
users_table
GROUP BY
1
ORDER BY
2 DESC NULLS LAST, 1 DESC;
-- window clause operates on the results of a subquery
SELECT
user_id, max(value_1) OVER (PARTITION BY user_id, MIN(value_2))
FROM (
SELECT
DISTINCT us.user_id, us.value_2, value_1, random() as r1
FROM
users_table as us, events_table
WHERE
us.user_id = events_table.user_id
AND event_type IN (1,2)
ORDER BY
user_id, value_2
) s
GROUP BY
1, value_1
ORDER BY
2 DESC, 1;
-- window function operates on the results of
-- a join
SELECT
us.user_id,
SUM(us.value_1) OVER (PARTITION BY us.user_id)
FROM
users_table us
JOIN
events_table ev
ON (us.user_id = ev.user_id)
GROUP BY
1,
value_1
ORDER BY
1,
2
LIMIT 5;
-- the same query, but this time join with an alias
SELECT
user_id, value_1, SUM(j.value_1) OVER (PARTITION BY j.user_id)
FROM
(users_table_local us
JOIN
events_table_local ev
USING (user_id ) ) j
GROUP BY
user_id,
value_1
ORDER BY
3 DESC, 2 DESC, 1 DESC
LIMIT 5;
-- the following test is not related to window functions
-- but uses some common code path, thus keeping it here
-- for now
WITH users_events AS
(
SELECT
user_id
FROM
users_table
)
SELECT
uid,
event_type,
value_2,
value_3
FROM (
(SELECT
user_id as uid
FROM
users_events
) users
JOIN
events_table
ON
users.uid = events_table.event_type
) a
ORDER BY
1,2,3,4
LIMIT 5;
-- querying views that have window functions should be ok
CREATE VIEW window_view AS
SELECT
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
FROM
users_table
GROUP BY
user_id, value_1
HAVING count(*) > 1;
-- Window function in View works
SELECT *
FROM
window_view
ORDER BY
2 DESC, 1
LIMIT
10;
-- the other way around also should work fine
-- query a view using window functions
CREATE VIEW users_view AS
SELECT * FROM users_table;
SELECT
DISTINCT user_id, rank() OVER (PARTITION BY user_id ORDER BY value_1)
FROM
users_view
GROUP BY
user_id, value_1
HAVING count(*) > 4;
DROP VIEW users_view, window_view;
-- window function uses columns from two different tables
SELECT
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
FROM
events_table, users_table
WHERE
users_table.user_id = events_table.user_id
WINDOW my_win AS (PARTITION BY events_table.user_id, users_table.value_1 ORDER BY events_table.time DESC)
ORDER BY
rnk DESC, 1 DESC
LIMIT 10;
-- the same query with reference tables on the partition by doesn't work
SELECT
DISTINCT ON (events_table.user_id, rnk) events_table.user_id, rank() OVER my_win AS rnk
FROM
events_table_ref as events_table, users_table
WHERE
users_table.user_id = events_table.user_id
WINDOW my_win AS (PARTITION BY events_table.user_id, users_table.value_1 ORDER BY events_table.time DESC)
ORDER BY
rnk DESC, 1 DESC
LIMIT 10;
-- ORDER BY in the window function is an aggragate
SELECT
user_id, rank() OVER my_win as rnk, avg(value_2) as avg_val_2
FROM
events_table
GROUP BY
user_id, date_trunc('day', time)
WINDOW my_win AS (PARTITION BY user_id ORDER BY avg(event_type) DESC)
ORDER BY
3 DESC, 2 DESC, 1 DESC;
-- lets push the limits of writing complex expressions aling with the window functions
SELECT
COUNT(*) OVER (PARTITION BY user_id, user_id + 1), rank() OVER (PARTITION BY user_id) as cnt1,
COUNT(*) OVER (PARTITION BY user_id, abs(value_1 - value_2)) as cnt2,
date_trunc('min', lag(time) OVER (PARTITION BY user_id)) as datee,
rank() OVER my_win as rnnk,
avg(case
when user_id > 4
then value_1
else value_2
end) FILTER (WHERE user_id > 2) OVER my_win_2 as filtered_count,
sum(user_id * (5.0 / (value_1 + value_2 + 0.1)) * value_3) FILTER (WHERE value_1::text LIKE '%1%') OVER my_win_4 as cnt_with_filter_2
FROM
users_table
WINDOW my_win AS (PARTITION BY user_id, (value_1%3)::int ORDER BY time DESC),
my_win_2 AS (PARTITION BY user_id, (value_1)::int ORDER BY time DESC),
my_win_3 AS (PARTITION BY user_id, date_trunc('min', time)),
my_win_4 AS (my_win_3 ORDER BY value_2, value_3)
ORDER BY
cnt_with_filter_2 DESC NULLS LAST, filtered_count DESC NULLS LAST, datee DESC NULLS LAST, rnnk DESC, cnt2 DESC, cnt1 DESC, user_id DESC
LIMIT 5;
-- some tests with GROUP BY along with PARTITION BY
SELECT
user_id,
rank() OVER my_win as my_rank,
avg(avg(event_type)) OVER my_win_2 as avg,
max(time) as mx_time
FROM
events_table
GROUP BY
user_id, value_2
WINDOW
my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC),
my_win_2 AS (PARTITION BY user_id, avg(user_id) ORDER BY count(*) DESC)
ORDER BY avg DESC,
mx_time DESC, my_rank DESC, user_id DESC;
-- test for range and rows mode and different window functions
-- mostly to make sure that deparsing works fine
SELECT
user_id, rank() OVER (PARTITION BY user_id ROWS BETWEEN
UNBOUNDED PRECEDING AND CURRENT ROW),
dense_rank() OVER (PARTITION BY user_id RANGE BETWEEN
UNBOUNDED PRECEDING AND CURRENT ROW),
CUME_DIST() OVER (PARTITION BY user_id RANGE BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
PERCENT_RANK() OVER (PARTITION BY user_id ORDER BY avg(value_1) RANGE BETWEEN
UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
FROM
users_table
GROUP BY
1
ORDER BY
4 DESC,3 DESC,2 DESC ,1 DESC;
-- some tests with GROUP BY, HAVING and LIMIT
SELECT
user_id, sum(event_type) OVER my_win , event_type
FROM
events_table
GROUP BY
user_id, event_type
HAVING count(*) > 2
WINDOW my_win AS (PARTITION BY user_id, max(event_type) ORDER BY count(*) DESC)
ORDER BY
2 DESC, 3 DESC, 1 DESC
LIMIT
5;
-- Group by has more columns than partition by
-- !!!!!!!!!!!!!!!!WRONG RESULTS
SELECT
DISTINCT user_id, SUM(value_2) OVER (PARTITION BY user_id)
FROM
users_table
GROUP BY
user_id, value_1, value_2
HAVING count(*) > 2
ORDER BY
2 DESC, 1
LIMIT
10;
-- TODO:: ADD SOME TEST WITH DISTINCT ON as well
-- not a meaningful query, with interesting syntax
SELECT
user_id,
AVG(avg(value_1)) OVER (PARTITION BY user_id, max(user_id), MIN(value_2)),
AVG(avg(user_id)) OVER (PARTITION BY user_id, min(user_id), AVG(value_1))
FROM
users_table
GROUP BY
1
ORDER BY
3 DESC, 2 DESC, 1 DESC;
-- we've fixed a bug related to joins w/wout alias
-- while implementing top window functions
-- thus adding some tests related to that (i.e., next 3 tests)
WITH users_events AS
(
SELECT
user_id
FROM
users_table
)
SELECT
uid,
event_type,
value_2,
value_3
FROM (
(SELECT
user_id as uid
FROM
users_events
) users
JOIN
events_table
ON
users.uid = events_table.event_type
) a
ORDER BY
1,2,3,4
LIMIT 5;
-- the following queries are almost the same,
-- the only difference is the final GROUP BY
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(SELECT
DISTINCT ON (value_2) value_2 , user_id, value_3
FROM
users_table
WHERE
(value_1 > 3)
ORDER BY
1,2,3
) AS b
USING (user_id)
GROUP BY user_id;
-- see the comment for the above query
SELECT a.user_id, avg(b.value_2) as subquery_avg
FROM
(SELECT
user_id
FROM
users_table
WHERE
(value_1 > 2)
GROUP BY
user_id
HAVING
count(distinct value_1) > 2
) as a
LEFT JOIN
(SELECT
DISTINCT ON (value_2) value_2 , user_id, value_3
FROM
users_table
WHERE
(value_1 > 3)
ORDER BY
1,2,3
) AS b
USING (user_id)
GROUP BY a.user_id;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment