Skip to content

Instantly share code, notes, and snippets.

@bassemawhoob
Last active April 12, 2024 18:15
Show Gist options
  • Save bassemawhoob/91df0d2dc12239cb0f860662d0a615b9 to your computer and use it in GitHub Desktop.
Save bassemawhoob/91df0d2dc12239cb0f860662d0a615b9 to your computer and use it in GitHub Desktop.
Revenue Retention Cohort Analysis - PostgreSQL
-- Based on: https://medium.com/quick-code/how-to-write-sql-to-calculate-user-cohort-retention-a1b3b57c7a2f
-- Uses DATE_PART instead of DATE_DIFF as it is not supported by PostgreSQL
-- (branch_id, scheduled_for, total_cents)
WITH activities AS (
SELECT branch_id, scheduled_for, total_cents
FROM orders
WHERE orders.status = 'complete'
),
-- (branch_id, cohort_month): cohort month is the first order date
cohort_items AS (
SELECT branch_id, date_trunc('MONTH', MIN(scheduled_for))::DATE AS cohort_month
FROM activities
GROUP BY branch_id
ORDER BY 1, 2
),
-- (branch_id, month_number, revenue): branch X has activity in month number X
branch_orders AS (
SELECT A.branch_id, (DATE_PART('year', C.cohort_month) - DATE_PART('year', A.scheduled_for)) * 12 +
(DATE_PART('month', C.cohort_month) - DATE_PART('month', A.scheduled_for)) AS month_number, SUM(A.total_cents) as revenue
FROM activities A
LEFT JOIN cohort_items C ON A.branch_id = C.branch_id
GROUP BY 1, 2
ORDER BY 1, 2
),
-- (cohort_month, count, revenue)
cohort_size AS (
SELECT cohort_month, COUNT(DISTINCT A.branch_id) AS num_branches, SUM(A.total_cents) as revenue
FROM cohort_items C
LEFT JOIN activities A ON A.branch_id = C.branch_id
GROUP BY 1
ORDER BY 1
),
-- (cohort_month, month_number, count, revenue)
retention_table AS (
SELECT C.cohort_month, A.month_number, COUNT(DISTINCT A.branch_id) AS num_branches, SUM(A.revenue) as revenue
FROM branch_orders A
LEFT JOIN cohort_items C ON A.branch_id = C.branch_id
GROUP BY 1, 2
)
-- final value: (cohort_month, size, month_number, total_revenue, repeators, percentage)
SELECT B.cohort_month, S.num_branches AS total_branches, B.month_number, B.revenue / 100 as total_revenue, B.num_branches AS total_repeators, B.revenue / S.revenue as percentage
FROM retention_table B
LEFT JOIN cohort_size S ON B.cohort_month = S.cohort_month
WHERE B.cohort_month IS NOT NULL
ORDER BY 1, 3 desc
@asifnushala
Copy link

could you please provide the example dataset so that people can understand easily.
Thanks

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment