Skip to content

Instantly share code, notes, and snippets.

@achad4
Last active October 20, 2023 12:32
Show Gist options
  • Save achad4/917ed47cc1fd237e05e057b6487dd6a3 to your computer and use it in GitHub Desktop.
Save achad4/917ed47cc1fd237e05e057b6487dd6a3 to your computer and use it in GitHub Desktop.
Ex. Flagging outliers in SQL
WITH transaction AS (
SELECT transaction_id,
customer_id,
state,
amount_spent_usd
FROM < FACT_TABLE_TRANSACTION >
),
customer_spend AS (
SELECT customer_id,
state,
COUNT(1) AS n_transactions,
AVG(amount_spent_usd) AS avg_customer_spend,
AVG(LN(amount_spent_usd)) AS log_avg_customer_spend
FROM transaction
GROUP BY 1, 2
),
state_spend AS (
SELECT state,
COUNT(DISTINCT customer_id) AS n_customers,
COUNT(1) AS n_transactions,
AVG(amount_spent_usd) AS avg_state_spend,
AVG(LN(amount_spent_usd)) AS log_avg_state_spend,
STDDEV(amount_spent_usd) AS std_spend,
STDDEV(LN(amount_spent_usd)) AS log_std_spend
FROM transaction
GROUP BY 1
),
customer_z_score AS (
SELECT *,
(c.avg_customer_spend - s.avg_state_spend) / NULLIFZERO(s.std_spend) AS customer_z_score,
(c.log_avg_customer_spend - s.log_avg_state_spend) / NULLIFZERO(s.log_std_spend) AS log_customer_z_score
FROM customer_spend c
INNER JOIN state_spend s USING (state)
)
SELECT customer_id,
log_customer_z_score,
avg_customer_spend
FROM customer_spend c
INNER JOIN customer_z_score USING (customer_id)
WHERE ABS(log_customer_z_score) > 2
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment