Skip to content

Instantly share code, notes, and snippets.

@marcocitus
Last active May 1, 2016 22:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marcocitus/733cf0188354bc6bff01394b2f2881b0 to your computer and use it in GitHub Desktop.
Save marcocitus/733cf0188354bc6bff01394b2f2881b0 to your computer and use it in GitHub Desktop.
Retention query using Citus
CREATE OR REPLACE FUNCTION consecutive_date_pattern(start_date date, num_days int)
RETURNS text[]
AS $BODY$
DECLARE
result text[];
BEGIN
SELECT array_agg('date=>"'||d::date||'"') INTO result
FROM generate_series(start_date, start_date + num_days * interval '1 day', '1 day') d;
RETURN result;
END;
$BODY$ LANGUAGE plpgsql;
SET citus.subquery_pushdown TO on;
-- Get the number of users who visited on 2016-04-07 and visited again
-- for X days in a row.
SELECT
start_date,
sum(contains_elements(events, ARRAY['date=>"'||start_date||'"'])) num_users,
sum(contains_elements(events, consecutive_date_pattern(start_date, 1))) day_1_repeat,
sum(contains_elements(events, consecutive_date_pattern(start_date, 2))) day_2_repeat,
sum(contains_elements(events, consecutive_date_pattern(start_date, 3))) day_3_repeat,
sum(contains_elements(events, consecutive_date_pattern(start_date, 4))) day_4_repeat,
sum(contains_elements(events, consecutive_date_pattern(start_date, 5))) day_5_repeat,
sum(contains_elements(events, consecutive_date_pattern(start_date, 6))) day_6_repeat
FROM (
SELECT * FROM (
(SELECT udid, recorddate::date AS start_date
FROM sessions
WHERE recorddate >= date '2016-04-07'
GROUP BY 1, 2
) user_dates
JOIN
(SELECT udid, array_agg(hstore('date', recorddate::text)) events
FROM (
SELECT udid, recorddate::date
FROM sessions
WHERE recorddate >= date '2016-04-07'
GROUP BY 1, 2 ORDER BY 1, 2
) user_dates
GROUP BY udid
) user_hstore_dates
USING(udid)
) user_hstore_date
ORDER BY udid) r
GROUP BY start_date
ORDER BY start_date;
CREATE OR REPLACE FUNCTION repeat_date_pattern(start_date date, num_days int)
RETURNS text[]
AS $BODY$
BEGIN
RETURN ARRAY['date=>"'||start_date||'"',
'date=>"'|| (start_date + num_days * interval '1 day')::date || '"'];
END;
$BODY$ LANGUAGE plpgsql;
SET citus.subquery_pushdown TO on;
-- Get the number of users who visited on 2016-04-07 and visited again
-- X days later, including those that already visited <X days later.
SELECT
start_date,
sum(contains_elements(events, ARRAY['date=>"'||start_date||'"'])) num_users,
sum(contains_elements(events, repeat_date_pattern(start_date, 1))) day_1_repeat,
sum(contains_elements(events, repeat_date_pattern(start_date, 2))) day_2_repeat,
sum(contains_elements(events, repeat_date_pattern(start_date, 3))) day_3_repeat,
sum(contains_elements(events, repeat_date_pattern(start_date, 4))) day_4_repeat,
sum(contains_elements(events, repeat_date_pattern(start_date, 5))) day_5_repeat,
sum(contains_elements(events, repeat_date_pattern(start_date, 6))) day_6_repeat
FROM (
SELECT * FROM (
(SELECT udid, recorddate::date AS start_date
FROM sessions
WHERE recorddate >= date '2016-04-07'
GROUP BY 1, 2
) user_dates
JOIN
(SELECT udid, array_agg(hstore('date', recorddate::text)) events
FROM (
SELECT udid, recorddate::date
FROM sessions
WHERE recorddate >= date '2016-04-07'
GROUP BY 1, 2 ORDER BY 1, 2
) user_dates
GROUP BY udid
) user_hstore_dates
USING(udid)
) user_hstore_date
ORDER BY udid) r
GROUP BY start_date
ORDER BY start_date;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment