Skip to content

Instantly share code, notes, and snippets.

@achad4
Last active March 31, 2022 13:20
Show Gist options
  • Save achad4/549683bbb6326f00477ab6184203522f to your computer and use it in GitHub Desktop.
Save achad4/549683bbb6326f00477ab6184203522f to your computer and use it in GitHub Desktop.
Ex. Flagging outliers based on Z-scores
WITH transaction AS (
SELECT transaction_id,
customer_id,
state,
amount_spent_usd
FROM FACT_TABLE_TRANSACTION
),
customer_spend AS (
SELECT customer_id,
state,
COUNT(1) AS n_transactions,
AVG(amount_spent_usd) AS avg_customer_spend,
AVG(LN(amount_spent_usd)) AS log_avg_customer_spend
FROM transaction
GROUP BY 1, 2
),
state_spend AS (
SELECT state,
COUNT(DISTINCT customer_id) AS n_customers,
COUNT(1) AS n_transactions,
AVG(amount_spent_usd) AS avg_state_spend,
AVG(LN(amount_spent_usd)) AS log_avg_state_spend,
STDDEV(amount_spent_usd) AS std_spend,
STDDEV(LN(amount_spent_usd)) AS log_std_spend
FROM transaction
GROUP BY 1, 2
),
customer_z_score AS (
SELECT *,
(c.avg_customer_spend - s.avg_state_spend) / NULLIFZERO(s.std_spend) AS customer_z_score,
(c.log_avg_customer_spend - s.log_avg_state_spend) / NULLIFZERO(s.log_std_spend) AS log_customer_z_score
FROM customer_spend c
INNER JOIN state_spend s USING (SIDECAR_CODE)
)
SELECT customer_id,
customer_z_score,
avg_customer_spend
FROM customer_spend c
INNER JOIN customer_z_score USING (customer_id)
WHERE ABS(z_score) > 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment