Last active
October 20, 2023 12:32
-
-
Save achad4/917ed47cc1fd237e05e057b6487dd6a3 to your computer and use it in GitHub Desktop.
Ex. Flagging outliers in SQL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH transaction AS ( | |
SELECT transaction_id, | |
customer_id, | |
state, | |
amount_spent_usd | |
FROM < FACT_TABLE_TRANSACTION > | |
), | |
customer_spend AS ( | |
SELECT customer_id, | |
state, | |
COUNT(1) AS n_transactions, | |
AVG(amount_spent_usd) AS avg_customer_spend, | |
AVG(LN(amount_spent_usd)) AS log_avg_customer_spend | |
FROM transaction | |
GROUP BY 1, 2 | |
), | |
state_spend AS ( | |
SELECT state, | |
COUNT(DISTINCT customer_id) AS n_customers, | |
COUNT(1) AS n_transactions, | |
AVG(amount_spent_usd) AS avg_state_spend, | |
AVG(LN(amount_spent_usd)) AS log_avg_state_spend, | |
STDDEV(amount_spent_usd) AS std_spend, | |
STDDEV(LN(amount_spent_usd)) AS log_std_spend | |
FROM transaction | |
GROUP BY 1 | |
), | |
customer_z_score AS ( | |
SELECT *, | |
(c.avg_customer_spend - s.avg_state_spend) / NULLIFZERO(s.std_spend) AS customer_z_score, | |
(c.log_avg_customer_spend - s.log_avg_state_spend) / NULLIFZERO(s.log_std_spend) AS log_customer_z_score | |
FROM customer_spend c | |
INNER JOIN state_spend s USING (state) | |
) | |
SELECT customer_id, | |
log_customer_z_score, | |
avg_customer_spend | |
FROM customer_spend c | |
INNER JOIN customer_z_score USING (customer_id) | |
WHERE ABS(log_customer_z_score) > 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment