Skip to content

Instantly share code, notes, and snippets.

@chespinoza
Last active February 2, 2023 13:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chespinoza/12f2523ef3ea103d62faa152c03e8411 to your computer and use it in GitHub Desktop.
Save chespinoza/12f2523ef3ea103d62faa152c03e8411 to your computer and use it in GitHub Desktop.
WITH
preprocessed_orders AS (
SELECT
*
FROM (
SELECT
nozzle_customer_name,
customer_id,
amazon_order_id,
item_promotion_discount,
item_price,
quantity,
DATETIME(TIMESTAMP(purchase_date), "America/Los_Angeles") AS purchase_date,
DATE_TRUNC(DATETIME(TIMESTAMP(purchase_date), "America/Los_Angeles"), month) AS purchase_window,
asin,
-- "value" column is necessary to calculate the "highest value" for NTB.
-- Find more context about it in the "ntb" CTE comments.
quantity * item_price AS value
FROM
`nozzle.rotterdam.combined_orders`
WHERE
nozzle_customer_name = "A22QNUQNA4QHN8" -- Wake - unybrands
AND marketplace_id = "ATVPDKIKX0DER"
AND customer_id IS NOT NULL )
WHERE
purchase_window < DATE_TRUNC("2023-01-01", month) ),
-- NTB = New To brand
--
-- This actually means:
-- "The first order a customer made with a given Amazon merchant."
--
-- If there is more than 1 ASIN on such "first order", then a specific ASIN
-- with the "highest value" is considered an NTB. And the formula for value is:
--
-- quantity * NON-DISCOUNTED item price
--
-- As a result of the logic described above, there can be only one ASIN
-- considered as NTB.
ntb AS (
-- What's important in the context of the LTV query, a customer is
-- NTB for the whole period. So if a customer made their first order
-- on 2022-01-01, it is considered NTB for the entire 2022-01 period.
-- That's also why ntb CTE should return both customer_id and period.
SELECT
DISTINCT nozzle_customer_name,
customer_id,
purchase_date,
cohort
FROM (
SELECT
nozzle_customer_name,
customer_id,
purchase_date,
purchase_window AS cohort,
asin,
RANK() OVER(PARTITION BY customer_id ORDER BY purchase_date, value DESC) AS ntb_rank
FROM
preprocessed_orders ) AS p -- The "p" alias is necessary to make the filter clause work in both of its
-- query contexts. The gist is that otherwise the "asin" column would be
-- ambiguous. See the second {asin_filter} call to understand why fully.
WHERE
ntb_rank = 1
AND cohort >= DATE_TRUNC("2022-07-01", month)
AND cohort <= DATE_TRUNC("2023-01-01", month) ),
second_purchases AS (
-- This query returns all the 2nd purchases per customer id
-- now I need to count and group by cohort
SELECT
customer_id,
amazon_order_id,
asin,
cohort
FROM (
SELECT
po.customer_id AS customer_id,
po.amazon_order_id AS amazon_order_id,
po.asin AS asin,
ntb.cohort AS cohort,
RANK() OVER(PARTITION BY po.customer_id ORDER BY po.purchase_date ASC) AS second_purchase_rank
FROM
preprocessed_orders po
JOIN
ntb
ON
ntb.customer_id = po.customer_id
WHERE
po.purchase_date > ntb.purchase_date )
WHERE
second_purchase_rank = 1 ),
repeat_customers AS (
SELECT
cohort,
COUNT(*) AS repeat_customers
FROM
second_purchases
GROUP BY
cohort
ORDER BY
cohort ),
total_customers AS (
SELECT
po.purchase_window AS cohort,
COUNT(*) AS total_customers
FROM (
SELECT
po.*,
ROW_NUMBER() OVER (PARTITION BY po.customer_id ORDER BY po.purchase_date ASC) AS seqnum
FROM
preprocessed_orders po) po
WHERE
po.seqnum =1
AND po.purchase_window >= DATE_TRUNC("2022-07-01", month)
AND po.purchase_window <= DATE_TRUNC("2023-01-01", month)
GROUP BY
po.purchase_window
ORDER BY
po.purchase_window )
SELECT
tc.cohort,
tc.total_customers,
rc.repeat_customers,
SAFE_DIVIDE(rc.repeat_customers, tc.total_customers) * 100 AS retention_rate,
FROM
total_customers tc
JOIN
repeat_customers rc
ON
tc.cohort = rc.cohort
ORDER BY
tc.cohort
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment