Last active
March 31, 2022 13:20
-
-
Save achad4/549683bbb6326f00477ab6184203522f to your computer and use it in GitHub Desktop.
Ex. Flagging outliers based on Z-scores
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH transaction AS ( | |
SELECT transaction_id, | |
customer_id, | |
state, | |
amount_spent_usd | |
FROM FACT_TABLE_TRANSACTION | |
), | |
customer_spend AS ( | |
SELECT customer_id, | |
state, | |
COUNT(1) AS n_transactions, | |
AVG(amount_spent_usd) AS avg_customer_spend, | |
AVG(LN(amount_spent_usd)) AS log_avg_customer_spend | |
FROM transaction | |
GROUP BY 1, 2 | |
), | |
state_spend AS ( | |
SELECT state, | |
COUNT(DISTINCT customer_id) AS n_customers, | |
COUNT(1) AS n_transactions, | |
AVG(amount_spent_usd) AS avg_state_spend, | |
AVG(LN(amount_spent_usd)) AS log_avg_state_spend, | |
STDDEV(amount_spent_usd) AS std_spend, | |
STDDEV(LN(amount_spent_usd)) AS log_std_spend | |
FROM transaction | |
GROUP BY 1, 2 | |
), | |
customer_z_score AS ( | |
SELECT *, | |
(c.avg_customer_spend - s.avg_state_spend) / NULLIFZERO(s.std_spend) AS customer_z_score, | |
(c.log_avg_customer_spend - s.log_avg_state_spend) / NULLIFZERO(s.log_std_spend) AS log_customer_z_score | |
FROM customer_spend c | |
INNER JOIN state_spend s USING (SIDECAR_CODE) | |
) | |
SELECT customer_id, | |
customer_z_score, | |
avg_customer_spend | |
FROM customer_spend c | |
INNER JOIN customer_z_score USING (customer_id) | |
WHERE ABS(z_score) > 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment