Last active
March 10, 2017 08:44
-
-
Save onderkalaci/1be7cf338d6322eaf9bd5aa6a202982b to your computer and use it in GitHub Desktop.
Subquery pushdown -- example queries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET citus.shard_replication_factor = 2; | |
CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint); | |
SELECT create_distributed_table('raw_events_first', 'user_id'); | |
CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint); | |
SELECT create_distributed_table('raw_events_second', 'user_id'); | |
CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp); | |
SELECT create_distributed_table('agg_events', 'user_id');; | |
-- create the reference table as well | |
CREATE TABLE reference_table (user_id int); | |
SELECT create_reference_table('reference_table'); | |
CREATE TABLE raw_events_first_local (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint); | |
CREATE TABLE raw_events_second_local (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint); | |
CREATE TABLE agg_events_local (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp); | |
CREATE TABLE reference_table_local (user_id int); | |
insert into raw_events_first_local (select (random() * 1000)::int, now(), (random() * 10000)::int, (random() * 10000)::int,(random() * 10000)::int FROM generate_series(0,909999,1)); | |
insert into raw_events_second_local (select (random() * 1000)::int, now(), (random() * 10000)::int, (random() * 10000)::int,(random() * 10000)::int FROM generate_series(0,909999,1)); | |
insert into reference_table_local (select (random() * 1000)::int FROM generate_series(0,909999,1)); | |
COPY raw_events_first_local TO '/tmp/data_1.csv' WITH CSV; | |
COPY raw_events_second_local TO '/tmp/data_2.csv' WITH CSV; | |
COPY reference_table_local TO '/tmp/data_3.csv' WITH CSV; | |
COPY raw_events_first FROM '/tmp/data_1.csv' WITH CSV; | |
COPY raw_events_second FROM '/tmp/data_2.csv' WITH CSV; | |
COPY reference_table FROM '/tmp/data_3.csv' WITH CSV; | |
------------------------------------ | |
------------------------------------ | |
-- HEAP - Q1: Vanilla Funnel Query | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE heap_q1 AS | |
SELECT user_id, array_length(events, 1) | |
FROM ( | |
SELECT user_id, array_agg(event ORDER BY time) AS events | |
FROM ( | |
SELECT u.user_id, hstore('type'::text, e.value_1::text) AS event, e.time | |
FROM raw_events_first_local AS u, | |
raw_events_second_local AS e | |
WHERE u.user_id = e.user_id | |
AND u.user_id >= 100 | |
AND u.user_id <= 250 | |
AND e.value_1 IN (100, 101, 102) | |
) t | |
GROUP BY user_id | |
) q; | |
-- now, try the same with distributed tables on the agg table | |
TRUNCATE agg_events; | |
INSERT INTO agg_events (user_id, value_1_agg) | |
SELECT user_id, array_length(events, 1) | |
FROM ( | |
SELECT user_id, array_agg(event ORDER BY time) AS events | |
FROM ( | |
SELECT u.user_id, hstore('type'::text, e.value_1::text) AS event, e.time | |
FROM raw_events_first AS u, | |
raw_events_second AS e | |
WHERE u.user_id = e.user_id | |
AND u.user_id >= 100 | |
AND u.user_id <= 250 | |
AND e.value_1 IN (100, 101, 102) | |
) t | |
GROUP BY user_id | |
) q; | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE heap_q1) | |
UNION ALL | |
(TABLE heap_q1 EXCEPT TABLE agg_events_temp) ; | |
------------------------------------ | |
------------------------------------ | |
-- Heap Q2: Funnel grouped by whether or not a user has done an event | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE heap_q2 AS | |
SELECT user_id, sum(array_length(events, 1)), length(hasdone_56367) | |
FROM ( | |
SELECT | |
t1.user_id, | |
array_agg(event ORDER BY time) AS events, | |
COALESCE(hasdone_56367, 'Has not done view_page_billing_page_paypal') AS hasdone_56367 | |
FROM ( | |
( | |
SELECT u.user_id, 'step=>1'::HSTORE AS event, e.time | |
FROM raw_events_first_local AS u, | |
raw_events_second_local AS e | |
WHERE u.user_id = e.user_id | |
AND u.user_id >= 100 | |
AND u.user_id <= 250 | |
AND e.value_1 IN (100, 101, 102) | |
) | |
UNION | |
( | |
SELECT u.user_id, 'step=>2'::HSTORE AS event, e.time | |
FROM raw_events_first_local AS u, | |
raw_events_second_local AS e | |
WHERE u.user_id = e.user_id | |
AND u.user_id >= 100 | |
AND u.user_id <= 250 | |
AND e.value_1 IN (103, 104, 105) | |
) | |
) t1 LEFT JOIN ( | |
SELECT DISTINCT user_id, | |
'Has done view_page_billing_page_paypal'::TEXT AS hasdone_56367 | |
FROM raw_events_second_local AS e | |
WHERE e.user_id >= 100 | |
AND e.user_id <= 250 | |
AND e.value_1 IN (106, 107, 108) | |
) t2 ON (t1.user_id = t2.user_id) | |
GROUP BY t1.user_id, hasdone_56367 | |
) t GROUP BY user_id, hasdone_56367; | |
TRUNCATE agg_events; | |
INSERT INTO agg_events (user_id, value_1_agg, value_2_agg ) | |
SELECT user_id, sum(array_length(events, 1)), length(hasdone_56367) | |
FROM ( | |
SELECT | |
t1.user_id, | |
array_agg(event ORDER BY time) AS events, | |
COALESCE(hasdone_56367, 'Has not done view_page_billing_page_paypal') AS hasdone_56367 | |
FROM ( | |
( | |
SELECT u.user_id, 'step=>1'::HSTORE AS event, e.time | |
FROM raw_events_first AS u, | |
raw_events_second AS e | |
WHERE u.user_id = e.user_id | |
AND u.user_id >= 100 | |
AND u.user_id <= 250 | |
AND e.value_1 IN (100, 101, 102) | |
) | |
UNION | |
( | |
SELECT u.user_id, 'step=>2'::HSTORE AS event, e.time | |
FROM raw_events_first AS u, | |
raw_events_second AS e | |
WHERE u.user_id = e.user_id | |
AND u.user_id >= 100 | |
AND u.user_id <= 250 | |
AND e.value_1 IN (103, 104, 105) | |
) | |
) t1 LEFT JOIN ( | |
SELECT DISTINCT user_id, | |
'Has done view_page_billing_page_paypal'::TEXT AS hasdone_56367 | |
FROM raw_events_second AS e | |
WHERE e.user_id >= 100 | |
AND e.user_id <= 250 | |
AND e.value_1 IN (106, 107, 108) | |
) t2 ON (t1.user_id = t2.user_id) | |
GROUP BY t1.user_id, hasdone_56367 | |
) t GROUP BY user_id, hasdone_56367; | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg, value_2_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE heap_q2) | |
UNION ALL | |
(TABLE heap_q2 EXCEPT TABLE agg_events_temp) ; | |
------------------------------------ | |
------------------------------------ | |
-- Heap Q3: Funnel, grouped by the number of times a user has done an event | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE heap_q3 AS | |
SELECT | |
user_id, | |
avg(array_length(events, 1)) AS event_average, | |
count_pay | |
FROM ( | |
SELECT | |
subquery_1.user_id, | |
array_agg(event ORDER BY time) AS events, | |
COALESCE(count_pay, 0) AS count_pay | |
FROM | |
( | |
(SELECT | |
raw_events_first.user_id, | |
'action=>1'AS event, | |
raw_events_second.time | |
FROM | |
raw_events_first_local as raw_events_first, | |
raw_events_second_local as raw_events_second | |
WHERE | |
raw_events_first.user_id = raw_events_second.user_id AND | |
raw_events_first.user_id >= 100 AND | |
raw_events_first.user_id <= 700 AND | |
raw_events_second.value_1 > 10 AND raw_events_second.value_1 < 12 | |
) | |
UNION | |
(SELECT | |
raw_events_first.user_id, | |
'action=>2'AS event, | |
raw_events_second.time | |
FROM | |
raw_events_first_local as raw_events_first, | |
raw_events_second_local as raw_events_second | |
WHERE | |
raw_events_first.user_id = raw_events_second.user_id AND | |
raw_events_first.user_id >= 100 AND | |
raw_events_first.user_id <= 700 AND | |
raw_events_second.value_1 > 12 AND raw_events_second.value_1 < 14 | |
) | |
) AS subquery_1 | |
LEFT JOIN | |
(SELECT | |
user_id, | |
COUNT(*) AS count_pay | |
FROM | |
raw_events_first_local as raw_events_first | |
WHERE | |
user_id >= 100 AND | |
user_id <= 700 AND | |
raw_events_first.value_1 > 15 AND raw_events_first.value_1 < 17 | |
GROUP BY | |
user_id | |
HAVING | |
COUNT(*) > 0) AS subquery_2 | |
ON | |
subquery_1.user_id = subquery_2.user_id | |
GROUP BY | |
subquery_1.user_id, | |
count_pay) AS subquery_top | |
WHERE | |
array_ndims(events) > 0 | |
GROUP BY | |
count_pay, user_id | |
ORDER BY | |
count_pay; | |
-- now, try the same with distributed tables on the agg table | |
TRUNCATE agg_events; | |
INSERT INTO agg_events (user_id, value_1_agg, value_2_agg) | |
SELECT | |
user_id, | |
avg(array_length(events, 1)) AS event_average, | |
count_pay | |
FROM ( | |
SELECT | |
subquery_1.user_id, | |
array_agg(event ORDER BY time) AS events, | |
COALESCE(count_pay, 0) AS count_pay | |
FROM | |
( | |
(SELECT | |
raw_events_first.user_id, | |
'action=>1'AS event, | |
raw_events_second.time | |
FROM | |
raw_events_first, | |
raw_events_second | |
WHERE | |
raw_events_first.user_id = raw_events_second.user_id AND | |
raw_events_first.user_id >= 100 AND | |
raw_events_first.user_id <= 700 AND | |
raw_events_second.value_1 > 10 AND raw_events_second.value_1 < 12 | |
) | |
UNION | |
(SELECT | |
raw_events_first.user_id, | |
'action=>2'AS event, | |
raw_events_second.time | |
FROM | |
raw_events_first, | |
raw_events_second | |
WHERE | |
raw_events_first.user_id = raw_events_second.user_id AND | |
raw_events_first.user_id >= 100 AND | |
raw_events_first.user_id <= 700 AND | |
raw_events_second.value_1 > 12 AND raw_events_second.value_1 < 14 | |
) | |
) AS subquery_1 | |
LEFT JOIN | |
(SELECT | |
user_id, | |
COUNT(*) AS count_pay | |
FROM | |
raw_events_first | |
WHERE | |
user_id >= 100 AND | |
user_id <= 700 AND | |
raw_events_first.value_1 > 15 AND raw_events_first.value_1 < 17 | |
GROUP BY | |
user_id | |
HAVING | |
COUNT(*) > 0) AS subquery_2 | |
ON | |
subquery_1.user_id = subquery_2.user_id | |
GROUP BY | |
subquery_1.user_id, | |
count_pay) AS subquery_top | |
WHERE | |
array_ndims(events) > 0 | |
GROUP BY | |
count_pay, user_id | |
ORDER BY | |
count_pay; | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg, value_2_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE heap_q3) | |
UNION ALL | |
(TABLE heap_q3 EXCEPT TABLE agg_events_temp) ; | |
------------------------------------ | |
------------------------------------ | |
-- Heap Q4: Most recently seen users’ events | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE heap_q4 AS | |
SELECT | |
user_id, | |
user_lastseen, | |
array_length(event_array, 1) | |
FROM ( | |
SELECT | |
user_id, | |
max(u.time) as user_lastseen, | |
array_agg(value_1 ORDER BY u.time) AS event_array | |
FROM ( | |
SELECT user_id, time | |
FROM raw_events_first_local | |
WHERE | |
user_id >= 100 AND | |
user_id <= 700 AND | |
raw_events_first_local.value_1 > 10 AND raw_events_first_local.value_1 < 12 | |
) u LEFT JOIN LATERAL ( | |
SELECT value_1, time | |
FROM raw_events_second_local | |
WHERE user_id = u.user_id AND | |
raw_events_second_local.value_1 > 10 AND raw_events_second_local.value_1 < 12 | |
) t ON true | |
GROUP BY user_id | |
) AS shard_union | |
ORDER BY user_lastseen DESC; | |
TRUNCATE agg_events; | |
INSERT INTO agg_events (user_id, agg_time, value_2_agg) | |
SELECT | |
user_id, | |
user_lastseen, | |
array_length(event_array, 1) | |
FROM ( | |
SELECT | |
user_id, | |
max(u.time) as user_lastseen, | |
array_agg(value_1 ORDER BY u.time) AS event_array | |
FROM ( | |
SELECT user_id, time | |
FROM raw_events_first | |
WHERE | |
user_id >= 100 AND | |
user_id <= 700 AND | |
raw_events_first.value_1 > 10 AND raw_events_first.value_1 < 12 | |
) u LEFT JOIN LATERAL ( | |
SELECT value_1, time | |
FROM raw_events_second | |
WHERE user_id = u.user_id AND | |
raw_events_second.value_1 > 10 AND raw_events_second.value_1 < 12 | |
) t ON true | |
GROUP BY user_id | |
) AS shard_union | |
ORDER BY user_lastseen DESC; | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, agg_time, value_2_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE heap_q4) | |
UNION ALL | |
(TABLE heap_q4 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- IgnitionOne Q2: Count the number of distinct users who are in segment X and Y and Z | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE ignitionOneQ2 AS | |
SELECT DISTINCT user_id | |
FROM raw_events_first_local | |
WHERE user_id IN (SELECT user_id FROM raw_events_first_local WHERE value_1 >= 10 AND value_1 <= 20) | |
AND user_id IN (SELECT user_id FROM raw_events_first_local WHERE value_1 >= 30 AND value_1 <= 40) | |
AND user_id IN (SELECT user_id FROM raw_events_first_local WHERE value_1 >= 50 AND value_1 <= 60); | |
TRUNCATE agg_events; | |
INSERT INTO agg_events (user_id) | |
SELECT DISTINCT user_id | |
FROM raw_events_first | |
WHERE user_id IN (SELECT user_id FROM raw_events_first WHERE value_1 >= 10 AND value_1 <= 20) | |
AND user_id IN (SELECT user_id FROM raw_events_first WHERE value_1 >= 30 AND value_1 <= 40) | |
AND user_id IN (SELECT user_id FROM raw_events_first WHERE value_1 >= 50 AND value_1 <= 60); | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE ignitionOneQ2) | |
UNION ALL | |
(TABLE ignitionOneQ2 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- IgnitionOne Q3: Count the number of distinct users who are in at least two of X and Y and Z segments | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE ignitionOneQ3 AS | |
SELECT user_id | |
FROM raw_events_first_local | |
WHERE (value_1 = 10 | |
OR value_1 = 11 | |
OR value_1 = 12) | |
GROUP BY user_id | |
HAVING count(distinct value_1) >= 2; | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id) | |
SELECT user_id | |
FROM raw_events_first | |
WHERE (value_1 = 10 | |
OR value_1 = 11 | |
OR value_1 = 12) | |
GROUP BY user_id | |
HAVING count(distinct value_1) >= 2; | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE ignitionOneQ3) | |
UNION ALL | |
(TABLE ignitionOneQ3 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- IgnitionOne Q4: Count the number of distinct users who are in segments X and Y, but not in segment Z | |
---- NOT SUPPORTED DUE TO NOT IN ------------------- | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE ignitionOneQ3 AS | |
SELECT user_id | |
FROM raw_events_first_local | |
WHERE (value_1 = 10 | |
OR value_1 = 11) | |
AND user_id NOT IN (select user_id from raw_events_first_local where value_1 = 12) | |
GROUP BY user_id | |
HAVING count(distinct value_1) = 2; | |
INSERT INTO agg_events (user_id) | |
SELECT user_id | |
FROM raw_events_first | |
WHERE (value_1 = 10 | |
OR value_1 = 11) | |
AND user_id NOT IN (select user_id from raw_events_first where value_1 = 12) | |
GROUP BY user_id | |
HAVING count(distinct value_1) = 2; | |
------------------------------------ | |
------------------------------------ | |
-- OMetria Q1: Find customers who have done X, and satisfy other customer specific criteria | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE OMetriaQ1 AS | |
SELECT user_id, value_2 FROM raw_events_first_local WHERE | |
value_1 = 100 | |
AND value_2 >= 5 | |
AND EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first_local.user_id); | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id, value_2_agg) | |
SELECT user_id, value_2 FROM raw_events_first WHERE | |
value_1 = 100 | |
AND value_2 >= 5 | |
AND EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first.user_id); | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ1) | |
UNION ALL | |
(TABLE OMetriaQ1 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- OMetria Q2: Customers who haven’t done X, and satisfy other customer specific criteria | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE OMetriaQ2 AS | |
SELECT user_id, value_2 FROM raw_events_first_local WHERE | |
value_1 = 101 | |
AND value_2 >= 5 | |
AND NOT EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first_local.user_id); | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id, value_2_agg) | |
SELECT user_id, value_2 FROM raw_events_first WHERE | |
value_1 = 101 | |
AND value_2 >= 5 | |
AND NOT EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first.user_id); | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ2) | |
UNION ALL | |
(TABLE OMetriaQ2 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- OMetria Q3: Customers who have done X and Y, and satisfy other customer specific criteria | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE OMetriaQ3 AS | |
SELECT user_id, value_2 FROM raw_events_first_local WHERE | |
value_1 = 100 | |
AND value_2 >= 5 | |
AND EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first_local.user_id) | |
AND EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first_local.user_id); | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id, value_2_agg) | |
SELECT user_id, value_2 FROM raw_events_first WHERE | |
value_1 = 100 | |
AND value_2 >= 5 | |
AND EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first.user_id) | |
AND EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first.user_id); | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ3) | |
UNION ALL | |
(TABLE OMetriaQ3 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- OMetria Q4: Customers who have done X and haven’t done Y, and satisfy other customer specific criteria | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE OMetriaQ4 AS | |
SELECT user_id, value_2 FROM raw_events_first_local WHERE | |
value_1 = 101 | |
AND value_2 >= 5 | |
AND EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first_local.user_id) | |
AND NOT EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first_local.user_id); | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id, value_2_agg) | |
SELECT user_id, value_2 FROM raw_events_first WHERE | |
value_1 = 101 | |
AND value_2 >= 5 | |
AND EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first.user_id) | |
AND NOT EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first.user_id); | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ4) | |
UNION ALL | |
(TABLE OMetriaQ4 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- OMetria Q5: Customers who have done X more than 10 times, and satisfy other customer specific criteria | |
------------------------------------ | |
------------------------------------ | |
CREATE temp TABLE ometriaq5 AS | |
SELECT user_id, | |
value_2 | |
FROM raw_events_first_local | |
WHERE value_1 > 100 | |
AND value_1 < 124 | |
AND value_2 >= 5 | |
AND EXISTS (SELECT user_id | |
FROM raw_events_second_local | |
WHERE value_1 > 100 | |
AND value_1 < 124 | |
AND value_3 > 100 | |
AND user_id = raw_events_first_local.user_id | |
GROUP BY user_id | |
HAVING Count(*) > 2); | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id, value_2_agg) | |
SELECT user_id, | |
value_2 | |
FROM raw_events_first | |
WHERE value_1 > 100 | |
AND value_1 < 124 | |
AND value_2 >= 5 | |
AND EXISTS (SELECT user_id | |
FROM raw_events_second | |
WHERE value_1 > 100 | |
AND value_1 < 124 | |
AND value_3 > 100 | |
AND user_id = raw_events_first.user_id | |
GROUP BY user_id | |
HAVING Count(*) > 2); | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ5) | |
UNION ALL | |
(TABLE OMetriaQ5 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- Segment.com Q1: Find me all users who logged in more than once | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE segmentQ1 AS | |
SELECT user_id, value_1 from | |
( | |
SELECT user_id, value_1 From raw_events_first | |
WHERE value_2 > 100 and user_id = 15 GROUP BY value_1, user_id HAVING count(*) > 1 | |
) as a; | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id, value_1_agg) | |
SELECT user_id, value_1 from | |
( | |
SELECT user_id, value_1 From raw_events_first | |
WHERE value_2 > 100 and user_id = 15 GROUP BY value_1, user_id HAVING count(*) > 1 | |
) as a; | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE segmentQ1) | |
UNION ALL | |
(TABLE segmentQ1 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- Segment.com Q2: Find me all users for whom the referrer was ‘google’ and which pages they visited | |
-- NO SUBQUERIES -- WE ALREADY SUPPORT | |
------------------------------------ | |
------------------------------------ | |
-- trivial join query | |
------------------------------------ | |
------------------------------------ | |
-- Segment.com Q3: Find me all users who were referred by google and who viewed the nps survey | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE segmentQ3 AS | |
Select user_id | |
From raw_events_second_local | |
Where value_1 = 156 | |
And value_2 > 500 | |
And user_id in | |
(select user_id | |
From raw_events_first_local | |
Where value_1 = 156 | |
And value_2 > 250); | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id) | |
Select user_id | |
From raw_events_second | |
Where value_1 = 156 | |
And value_2 > 500 | |
And user_id in | |
(select user_id | |
From raw_events_first | |
Where value_1 = 156 | |
And value_2 > 250); | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE segmentQ3) | |
UNION ALL | |
(TABLE segmentQ3 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- Segment.com Q4: Which events did people who were referred to by google do (For the gy2d project) | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE segmentQ4 AS | |
SELECT user_id, value_1 FROM raw_events_first_local | |
WHERE user_id in (SELECT user_id from raw_events_second_local WHERE value_2 > 5000 and value_2 < 5005) | |
GROUP BY user_id, value_1; | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id, value_1_agg) | |
SELECT user_id, value_1 FROM raw_events_first | |
WHERE user_id in (SELECT user_id from raw_events_second WHERE value_2 > 5000 and value_2 < 5005) | |
GROUP BY user_id, value_1; | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE segmentQ4) | |
UNION ALL | |
(TABLE segmentQ4 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- Segment.com Q5: Find me all the users who logged in more than three times (across all projects) | |
------------------------------------ | |
------------------------------------ | |
-- a trivial query Citus already handles wihtout subqueries | |
------------------------------------ | |
------------------------------------ | |
-- Clarifai Q1: Find me all the users who logged in more than three times (across all projects) | |
------------------------------------ | |
------------------------------------ | |
CREATE TEMP TABLE clarifiaq1 AS | |
SELECT | |
raw_events_first_local.user_id, raw_events_first_local.value_1, prob | |
FROM | |
raw_events_first_local | |
JOIN | |
(SELECT | |
ma.user_id, | |
(GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob | |
FROM raw_events_first_local AS ma, raw_events_second_local as short_list | |
WHERE short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.value_1 < 50 | |
-- ORDER BY prob DESC | |
-- OFFSET 0 | |
-- LIMIT 50 | |
) temp ON raw_events_first_local.user_id = temp.user_id WHERE raw_events_first_local.value_1 < 50; | |
TRUNCATE agg_events; | |
INSERT INTO agg_events(user_id, value_1_agg, value_3_agg) | |
SELECT | |
raw_events_first.user_id, raw_events_first.value_1, prob | |
FROM | |
raw_events_first | |
JOIN | |
(SELECT | |
ma.user_id, | |
(GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob | |
FROM raw_events_first AS ma, raw_events_second as short_list | |
WHERE short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.value_1 < 50 | |
-- ORDER BY prob DESC | |
-- OFFSET 0 | |
-- LIMIT 50 | |
) temp ON raw_events_first.user_id = temp.user_id WHERE raw_events_first.value_1 < 50; | |
DROP TABLE IF EXISTS agg_events_temp; | |
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg, value_3_agg FROM agg_events; | |
(TABLE agg_events_temp EXCEPT TABLE clarifiaq1) | |
UNION ALL | |
(TABLE clarifiaq1 EXCEPT TABLE agg_events_temp); | |
------------------------------------ | |
------------------------------------ | |
-- CloudWP Q1: Find me all the users who logged in more than three times (across all projects) | |
------------------------------------ | |
------------------------------------ | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment