Skip to content

Instantly share code, notes, and snippets.

@onderkalaci
Last active March 10, 2017 08:44
Show Gist options
  • Save onderkalaci/1be7cf338d6322eaf9bd5aa6a202982b to your computer and use it in GitHub Desktop.
Save onderkalaci/1be7cf338d6322eaf9bd5aa6a202982b to your computer and use it in GitHub Desktop.
Subquery pushdown -- example queries
SET citus.shard_replication_factor = 2;
CREATE TABLE raw_events_first (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
SELECT create_distributed_table('raw_events_first', 'user_id');
CREATE TABLE raw_events_second (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
SELECT create_distributed_table('raw_events_second', 'user_id');
CREATE TABLE agg_events (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
SELECT create_distributed_table('agg_events', 'user_id');;
-- create the reference table as well
CREATE TABLE reference_table (user_id int);
SELECT create_reference_table('reference_table');
CREATE TABLE raw_events_first_local (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
CREATE TABLE raw_events_second_local (user_id int, time timestamp, value_1 int, value_2 int, value_3 float, value_4 bigint);
CREATE TABLE agg_events_local (user_id int, value_1_agg int, value_2_agg int, value_3_agg float, value_4_agg bigint, agg_time timestamp);
CREATE TABLE reference_table_local (user_id int);
insert into raw_events_first_local (select (random() * 1000)::int, now(), (random() * 10000)::int, (random() * 10000)::int,(random() * 10000)::int FROM generate_series(0,909999,1));
insert into raw_events_second_local (select (random() * 1000)::int, now(), (random() * 10000)::int, (random() * 10000)::int,(random() * 10000)::int FROM generate_series(0,909999,1));
insert into reference_table_local (select (random() * 1000)::int FROM generate_series(0,909999,1));
COPY raw_events_first_local TO '/tmp/data_1.csv' WITH CSV;
COPY raw_events_second_local TO '/tmp/data_2.csv' WITH CSV;
COPY reference_table_local TO '/tmp/data_3.csv' WITH CSV;
COPY raw_events_first FROM '/tmp/data_1.csv' WITH CSV;
COPY raw_events_second FROM '/tmp/data_2.csv' WITH CSV;
COPY reference_table FROM '/tmp/data_3.csv' WITH CSV;
------------------------------------
------------------------------------
-- HEAP - Q1: Vanilla Funnel Query
------------------------------------
------------------------------------
CREATE TEMP TABLE heap_q1 AS
SELECT user_id, array_length(events, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events
FROM (
SELECT u.user_id, hstore('type'::text, e.value_1::text) AS event, e.time
FROM raw_events_first_local AS u,
raw_events_second_local AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 100
AND u.user_id <= 250
AND e.value_1 IN (100, 101, 102)
) t
GROUP BY user_id
) q;
-- now, try the same with distributed tables on the agg table
TRUNCATE agg_events;
INSERT INTO agg_events (user_id, value_1_agg)
SELECT user_id, array_length(events, 1)
FROM (
SELECT user_id, array_agg(event ORDER BY time) AS events
FROM (
SELECT u.user_id, hstore('type'::text, e.value_1::text) AS event, e.time
FROM raw_events_first AS u,
raw_events_second AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 100
AND u.user_id <= 250
AND e.value_1 IN (100, 101, 102)
) t
GROUP BY user_id
) q;
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE heap_q1)
UNION ALL
(TABLE heap_q1 EXCEPT TABLE agg_events_temp) ;
------------------------------------
------------------------------------
-- Heap Q2: Funnel grouped by whether or not a user has done an event
------------------------------------
------------------------------------
CREATE TEMP TABLE heap_q2 AS
SELECT user_id, sum(array_length(events, 1)), length(hasdone_56367)
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events,
COALESCE(hasdone_56367, 'Has not done view_page_billing_page_paypal') AS hasdone_56367
FROM (
(
SELECT u.user_id, 'step=>1'::HSTORE AS event, e.time
FROM raw_events_first_local AS u,
raw_events_second_local AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 100
AND u.user_id <= 250
AND e.value_1 IN (100, 101, 102)
)
UNION
(
SELECT u.user_id, 'step=>2'::HSTORE AS event, e.time
FROM raw_events_first_local AS u,
raw_events_second_local AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 100
AND u.user_id <= 250
AND e.value_1 IN (103, 104, 105)
)
) t1 LEFT JOIN (
SELECT DISTINCT user_id,
'Has done view_page_billing_page_paypal'::TEXT AS hasdone_56367
FROM raw_events_second_local AS e
WHERE e.user_id >= 100
AND e.user_id <= 250
AND e.value_1 IN (106, 107, 108)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_56367
) t GROUP BY user_id, hasdone_56367;
TRUNCATE agg_events;
INSERT INTO agg_events (user_id, value_1_agg, value_2_agg )
SELECT user_id, sum(array_length(events, 1)), length(hasdone_56367)
FROM (
SELECT
t1.user_id,
array_agg(event ORDER BY time) AS events,
COALESCE(hasdone_56367, 'Has not done view_page_billing_page_paypal') AS hasdone_56367
FROM (
(
SELECT u.user_id, 'step=>1'::HSTORE AS event, e.time
FROM raw_events_first AS u,
raw_events_second AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 100
AND u.user_id <= 250
AND e.value_1 IN (100, 101, 102)
)
UNION
(
SELECT u.user_id, 'step=>2'::HSTORE AS event, e.time
FROM raw_events_first AS u,
raw_events_second AS e
WHERE u.user_id = e.user_id
AND u.user_id >= 100
AND u.user_id <= 250
AND e.value_1 IN (103, 104, 105)
)
) t1 LEFT JOIN (
SELECT DISTINCT user_id,
'Has done view_page_billing_page_paypal'::TEXT AS hasdone_56367
FROM raw_events_second AS e
WHERE e.user_id >= 100
AND e.user_id <= 250
AND e.value_1 IN (106, 107, 108)
) t2 ON (t1.user_id = t2.user_id)
GROUP BY t1.user_id, hasdone_56367
) t GROUP BY user_id, hasdone_56367;
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg, value_2_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE heap_q2)
UNION ALL
(TABLE heap_q2 EXCEPT TABLE agg_events_temp) ;
------------------------------------
------------------------------------
-- Heap Q3: Funnel, grouped by the number of times a user has done an event
------------------------------------
------------------------------------
CREATE TEMP TABLE heap_q3 AS
SELECT
user_id,
avg(array_length(events, 1)) AS event_average,
count_pay
FROM (
SELECT
subquery_1.user_id,
array_agg(event ORDER BY time) AS events,
COALESCE(count_pay, 0) AS count_pay
FROM
(
(SELECT
raw_events_first.user_id,
'action=>1'AS event,
raw_events_second.time
FROM
raw_events_first_local as raw_events_first,
raw_events_second_local as raw_events_second
WHERE
raw_events_first.user_id = raw_events_second.user_id AND
raw_events_first.user_id >= 100 AND
raw_events_first.user_id <= 700 AND
raw_events_second.value_1 > 10 AND raw_events_second.value_1 < 12
)
UNION
(SELECT
raw_events_first.user_id,
'action=>2'AS event,
raw_events_second.time
FROM
raw_events_first_local as raw_events_first,
raw_events_second_local as raw_events_second
WHERE
raw_events_first.user_id = raw_events_second.user_id AND
raw_events_first.user_id >= 100 AND
raw_events_first.user_id <= 700 AND
raw_events_second.value_1 > 12 AND raw_events_second.value_1 < 14
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
raw_events_first_local as raw_events_first
WHERE
user_id >= 100 AND
user_id <= 700 AND
raw_events_first.value_1 > 15 AND raw_events_first.value_1 < 17
GROUP BY
user_id
HAVING
COUNT(*) > 0) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
WHERE
array_ndims(events) > 0
GROUP BY
count_pay, user_id
ORDER BY
count_pay;
-- now, try the same with distributed tables on the agg table
TRUNCATE agg_events;
INSERT INTO agg_events (user_id, value_1_agg, value_2_agg)
SELECT
user_id,
avg(array_length(events, 1)) AS event_average,
count_pay
FROM (
SELECT
subquery_1.user_id,
array_agg(event ORDER BY time) AS events,
COALESCE(count_pay, 0) AS count_pay
FROM
(
(SELECT
raw_events_first.user_id,
'action=>1'AS event,
raw_events_second.time
FROM
raw_events_first,
raw_events_second
WHERE
raw_events_first.user_id = raw_events_second.user_id AND
raw_events_first.user_id >= 100 AND
raw_events_first.user_id <= 700 AND
raw_events_second.value_1 > 10 AND raw_events_second.value_1 < 12
)
UNION
(SELECT
raw_events_first.user_id,
'action=>2'AS event,
raw_events_second.time
FROM
raw_events_first,
raw_events_second
WHERE
raw_events_first.user_id = raw_events_second.user_id AND
raw_events_first.user_id >= 100 AND
raw_events_first.user_id <= 700 AND
raw_events_second.value_1 > 12 AND raw_events_second.value_1 < 14
)
) AS subquery_1
LEFT JOIN
(SELECT
user_id,
COUNT(*) AS count_pay
FROM
raw_events_first
WHERE
user_id >= 100 AND
user_id <= 700 AND
raw_events_first.value_1 > 15 AND raw_events_first.value_1 < 17
GROUP BY
user_id
HAVING
COUNT(*) > 0) AS subquery_2
ON
subquery_1.user_id = subquery_2.user_id
GROUP BY
subquery_1.user_id,
count_pay) AS subquery_top
WHERE
array_ndims(events) > 0
GROUP BY
count_pay, user_id
ORDER BY
count_pay;
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg, value_2_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE heap_q3)
UNION ALL
(TABLE heap_q3 EXCEPT TABLE agg_events_temp) ;
------------------------------------
------------------------------------
-- Heap Q4: Most recently seen users’ events
------------------------------------
------------------------------------
CREATE TEMP TABLE heap_q4 AS
SELECT
user_id,
user_lastseen,
array_length(event_array, 1)
FROM (
SELECT
user_id,
max(u.time) as user_lastseen,
array_agg(value_1 ORDER BY u.time) AS event_array
FROM (
SELECT user_id, time
FROM raw_events_first_local
WHERE
user_id >= 100 AND
user_id <= 700 AND
raw_events_first_local.value_1 > 10 AND raw_events_first_local.value_1 < 12
) u LEFT JOIN LATERAL (
SELECT value_1, time
FROM raw_events_second_local
WHERE user_id = u.user_id AND
raw_events_second_local.value_1 > 10 AND raw_events_second_local.value_1 < 12
) t ON true
GROUP BY user_id
) AS shard_union
ORDER BY user_lastseen DESC;
TRUNCATE agg_events;
INSERT INTO agg_events (user_id, agg_time, value_2_agg)
SELECT
user_id,
user_lastseen,
array_length(event_array, 1)
FROM (
SELECT
user_id,
max(u.time) as user_lastseen,
array_agg(value_1 ORDER BY u.time) AS event_array
FROM (
SELECT user_id, time
FROM raw_events_first
WHERE
user_id >= 100 AND
user_id <= 700 AND
raw_events_first.value_1 > 10 AND raw_events_first.value_1 < 12
) u LEFT JOIN LATERAL (
SELECT value_1, time
FROM raw_events_second
WHERE user_id = u.user_id AND
raw_events_second.value_1 > 10 AND raw_events_second.value_1 < 12
) t ON true
GROUP BY user_id
) AS shard_union
ORDER BY user_lastseen DESC;
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, agg_time, value_2_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE heap_q4)
UNION ALL
(TABLE heap_q4 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- IgnitionOne Q2: Count the number of distinct users who are in segment X and Y and Z
------------------------------------
------------------------------------
CREATE TEMP TABLE ignitionOneQ2 AS
SELECT DISTINCT user_id
FROM raw_events_first_local
WHERE user_id IN (SELECT user_id FROM raw_events_first_local WHERE value_1 >= 10 AND value_1 <= 20)
AND user_id IN (SELECT user_id FROM raw_events_first_local WHERE value_1 >= 30 AND value_1 <= 40)
AND user_id IN (SELECT user_id FROM raw_events_first_local WHERE value_1 >= 50 AND value_1 <= 60);
TRUNCATE agg_events;
INSERT INTO agg_events (user_id)
SELECT DISTINCT user_id
FROM raw_events_first
WHERE user_id IN (SELECT user_id FROM raw_events_first WHERE value_1 >= 10 AND value_1 <= 20)
AND user_id IN (SELECT user_id FROM raw_events_first WHERE value_1 >= 30 AND value_1 <= 40)
AND user_id IN (SELECT user_id FROM raw_events_first WHERE value_1 >= 50 AND value_1 <= 60);
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE ignitionOneQ2)
UNION ALL
(TABLE ignitionOneQ2 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- IgnitionOne Q3: Count the number of distinct users who are in at least two of X and Y and Z segments
------------------------------------
------------------------------------
CREATE TEMP TABLE ignitionOneQ3 AS
SELECT user_id
FROM raw_events_first_local
WHERE (value_1 = 10
OR value_1 = 11
OR value_1 = 12)
GROUP BY user_id
HAVING count(distinct value_1) >= 2;
TRUNCATE agg_events;
INSERT INTO agg_events(user_id)
SELECT user_id
FROM raw_events_first
WHERE (value_1 = 10
OR value_1 = 11
OR value_1 = 12)
GROUP BY user_id
HAVING count(distinct value_1) >= 2;
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE ignitionOneQ3)
UNION ALL
(TABLE ignitionOneQ3 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- IgnitionOne Q4: Count the number of distinct users who are in segments X and Y, but not in segment Z
---- NOT SUPPORTED DUE TO NOT IN -------------------
------------------------------------
------------------------------------
CREATE TEMP TABLE ignitionOneQ3 AS
SELECT user_id
FROM raw_events_first_local
WHERE (value_1 = 10
OR value_1 = 11)
AND user_id NOT IN (select user_id from raw_events_first_local where value_1 = 12)
GROUP BY user_id
HAVING count(distinct value_1) = 2;
INSERT INTO agg_events (user_id)
SELECT user_id
FROM raw_events_first
WHERE (value_1 = 10
OR value_1 = 11)
AND user_id NOT IN (select user_id from raw_events_first where value_1 = 12)
GROUP BY user_id
HAVING count(distinct value_1) = 2;
------------------------------------
------------------------------------
-- OMetria Q1: Find customers who have done X, and satisfy other customer specific criteria
------------------------------------
------------------------------------
CREATE TEMP TABLE OMetriaQ1 AS
SELECT user_id, value_2 FROM raw_events_first_local WHERE
value_1 = 100
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first_local.user_id);
TRUNCATE agg_events;
INSERT INTO agg_events(user_id, value_2_agg)
SELECT user_id, value_2 FROM raw_events_first WHERE
value_1 = 100
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first.user_id);
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ1)
UNION ALL
(TABLE OMetriaQ1 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- OMetria Q2: Customers who haven’t done X, and satisfy other customer specific criteria
------------------------------------
------------------------------------
CREATE TEMP TABLE OMetriaQ2 AS
SELECT user_id, value_2 FROM raw_events_first_local WHERE
value_1 = 101
AND value_2 >= 5
AND NOT EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first_local.user_id);
TRUNCATE agg_events;
INSERT INTO agg_events(user_id, value_2_agg)
SELECT user_id, value_2 FROM raw_events_first WHERE
value_1 = 101
AND value_2 >= 5
AND NOT EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first.user_id);
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ2)
UNION ALL
(TABLE OMetriaQ2 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- OMetria Q3: Customers who have done X and Y, and satisfy other customer specific criteria
------------------------------------
------------------------------------
CREATE TEMP TABLE OMetriaQ3 AS
SELECT user_id, value_2 FROM raw_events_first_local WHERE
value_1 = 100
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first_local.user_id)
AND EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first_local.user_id);
TRUNCATE agg_events;
INSERT INTO agg_events(user_id, value_2_agg)
SELECT user_id, value_2 FROM raw_events_first WHERE
value_1 = 100
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first.user_id)
AND EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first.user_id);
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ3)
UNION ALL
(TABLE OMetriaQ3 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- OMetria Q4: Customers who have done X and haven’t done Y, and satisfy other customer specific criteria
------------------------------------
------------------------------------
CREATE TEMP TABLE OMetriaQ4 AS
SELECT user_id, value_2 FROM raw_events_first_local WHERE
value_1 = 101
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first_local.user_id)
AND NOT EXISTS (SELECT user_id FROM raw_events_second_local WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first_local.user_id);
TRUNCATE agg_events;
INSERT INTO agg_events(user_id, value_2_agg)
SELECT user_id, value_2 FROM raw_events_first WHERE
value_1 = 101
AND value_2 >= 5
AND EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=100 AND value_3 > 100 AND user_id=raw_events_first.user_id)
AND NOT EXISTS (SELECT user_id FROM raw_events_second WHERE value_1=101 AND value_3 > 100 AND user_id=raw_events_first.user_id);
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ4)
UNION ALL
(TABLE OMetriaQ4 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- OMetria Q5: Customers who have done X more than 10 times, and satisfy other customer specific criteria
------------------------------------
------------------------------------
CREATE temp TABLE ometriaq5 AS
SELECT user_id,
value_2
FROM raw_events_first_local
WHERE value_1 > 100
AND value_1 < 124
AND value_2 >= 5
AND EXISTS (SELECT user_id
FROM raw_events_second_local
WHERE value_1 > 100
AND value_1 < 124
AND value_3 > 100
AND user_id = raw_events_first_local.user_id
GROUP BY user_id
HAVING Count(*) > 2);
TRUNCATE agg_events;
INSERT INTO agg_events(user_id, value_2_agg)
SELECT user_id,
value_2
FROM raw_events_first
WHERE value_1 > 100
AND value_1 < 124
AND value_2 >= 5
AND EXISTS (SELECT user_id
FROM raw_events_second
WHERE value_1 > 100
AND value_1 < 124
AND value_3 > 100
AND user_id = raw_events_first.user_id
GROUP BY user_id
HAVING Count(*) > 2);
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_2_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE OMetriaQ5)
UNION ALL
(TABLE OMetriaQ5 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- Segment.com Q1: Find me all users who logged in more than once
------------------------------------
------------------------------------
CREATE TEMP TABLE segmentQ1 AS
SELECT user_id, value_1 from
(
SELECT user_id, value_1 From raw_events_first
WHERE value_2 > 100 and user_id = 15 GROUP BY value_1, user_id HAVING count(*) > 1
) as a;
TRUNCATE agg_events;
INSERT INTO agg_events(user_id, value_1_agg)
SELECT user_id, value_1 from
(
SELECT user_id, value_1 From raw_events_first
WHERE value_2 > 100 and user_id = 15 GROUP BY value_1, user_id HAVING count(*) > 1
) as a;
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE segmentQ1)
UNION ALL
(TABLE segmentQ1 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- Segment.com Q2: Find me all users for whom the referrer was ‘google’ and which pages they visited
-- NO SUBQUERIES -- WE ALREADY SUPPORT
------------------------------------
------------------------------------
-- trivial join query
------------------------------------
------------------------------------
-- Segment.com Q3: Find me all users who were referred by google and who viewed the nps survey
------------------------------------
------------------------------------
CREATE TEMP TABLE segmentQ3 AS
Select user_id
From raw_events_second_local
Where value_1 = 156
And value_2 > 500
And user_id in
(select user_id
From raw_events_first_local
Where value_1 = 156
And value_2 > 250);
TRUNCATE agg_events;
INSERT INTO agg_events(user_id)
Select user_id
From raw_events_second
Where value_1 = 156
And value_2 > 500
And user_id in
(select user_id
From raw_events_first
Where value_1 = 156
And value_2 > 250);
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE segmentQ3)
UNION ALL
(TABLE segmentQ3 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- Segment.com Q4: Which events did people who were referred to by google do (For the gy2d project)
------------------------------------
------------------------------------
CREATE TEMP TABLE segmentQ4 AS
SELECT user_id, value_1 FROM raw_events_first_local
WHERE user_id in (SELECT user_id from raw_events_second_local WHERE value_2 > 5000 and value_2 < 5005)
GROUP BY user_id, value_1;
TRUNCATE agg_events;
INSERT INTO agg_events(user_id, value_1_agg)
SELECT user_id, value_1 FROM raw_events_first
WHERE user_id in (SELECT user_id from raw_events_second WHERE value_2 > 5000 and value_2 < 5005)
GROUP BY user_id, value_1;
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE segmentQ4)
UNION ALL
(TABLE segmentQ4 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- Segment.com Q5: Find me all the users who logged in more than three times (across all projects)
------------------------------------
------------------------------------
-- a trivial query Citus already handles wihtout subqueries
------------------------------------
------------------------------------
-- Clarifai Q1: Find me all the users who logged in more than three times (across all projects)
------------------------------------
------------------------------------
CREATE TEMP TABLE clarifiaq1 AS
SELECT
raw_events_first_local.user_id, raw_events_first_local.value_1, prob
FROM
raw_events_first_local
JOIN
(SELECT
ma.user_id,
(GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM raw_events_first_local AS ma, raw_events_second_local as short_list
WHERE short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.value_1 < 50
-- ORDER BY prob DESC
-- OFFSET 0
-- LIMIT 50
) temp ON raw_events_first_local.user_id = temp.user_id WHERE raw_events_first_local.value_1 < 50;
TRUNCATE agg_events;
INSERT INTO agg_events(user_id, value_1_agg, value_3_agg)
SELECT
raw_events_first.user_id, raw_events_first.value_1, prob
FROM
raw_events_first
JOIN
(SELECT
ma.user_id,
(GREATEST(coalesce(ma.value_4 / 250, 0.0) + GREATEST(1.0))) / 2 AS prob
FROM raw_events_first AS ma, raw_events_second as short_list
WHERE short_list.user_id = ma.user_id and ma.value_1 < 50 and short_list.value_1 < 50
-- ORDER BY prob DESC
-- OFFSET 0
-- LIMIT 50
) temp ON raw_events_first.user_id = temp.user_id WHERE raw_events_first.value_1 < 50;
DROP TABLE IF EXISTS agg_events_temp;
CREATE TEMP TABLE agg_events_temp AS SELECT user_id, value_1_agg, value_3_agg FROM agg_events;
(TABLE agg_events_temp EXCEPT TABLE clarifiaq1)
UNION ALL
(TABLE clarifiaq1 EXCEPT TABLE agg_events_temp);
------------------------------------
------------------------------------
-- CloudWP Q1: Find me all the users who logged in more than three times (across all projects)
------------------------------------
------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment