Created
June 3, 2025 21:27
-
-
Save jj0hns0n/3bd083b55c8d1713945e9726a272cd6d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Step 1: Compute summary stats per country and metric | |
WITH base_stats AS ( | |
SELECT | |
country, | |
'Physical' AS metric, | |
MIN(physical) AS min, | |
MAX(physical) AS max, | |
MAX(physical) - MIN(physical) AS range, | |
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY physical) AS q1, | |
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY physical) AS median, | |
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY physical) AS q3, | |
STDDEV(physical) AS stddev | |
FROM geosure_v5_20250603_443 | |
GROUP BY country | |
UNION ALL | |
SELECT | |
country, | |
'Theft' AS metric, | |
MIN(theft), | |
MAX(theft), | |
MAX(theft) - MIN(theft), | |
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY theft), | |
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY theft), | |
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY theft), | |
STDDEV(theft) | |
FROM geosure_v5_20250603_443 | |
GROUP BY country | |
), | |
-- Step 2: Count outliers for physical | |
physical_outliers AS ( | |
SELECT | |
t.country, | |
COUNT(*) AS outlier_count | |
FROM geosure_v5_20250603_443 t | |
JOIN ( | |
SELECT | |
country, | |
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY physical) AS q1, | |
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY physical) AS q3 | |
FROM geosure_v5_20250603_443 | |
GROUP BY country | |
) q ON t.country = q.country | |
WHERE t.physical < q.q1 - 1.5 * (q.q3 - q.q1) | |
OR t.physical > q.q3 + 1.5 * (q.q3 - q.q1) | |
GROUP BY t.country | |
), | |
-- Step 3: Count outliers for theft | |
theft_outliers AS ( | |
SELECT | |
t.country, | |
COUNT(*) AS outlier_count | |
FROM geosure_v5_20250603_443 t | |
JOIN ( | |
SELECT | |
country, | |
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY theft) AS q1, | |
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY theft) AS q3 | |
FROM geosure_v5_20250603_443 | |
GROUP BY country | |
) q ON t.country = q.country | |
WHERE t.theft < q.q1 - 1.5 * (q.q3 - q.q1) | |
OR t.theft > q.q3 + 1.5 * (q.q3 - q.q1) | |
GROUP BY t.country | |
) | |
-- Step 4: Combine stats with outliers | |
SELECT | |
bs.country, | |
bs.metric, | |
ROUND(bs.min, 2) AS min, | |
ROUND(bs.max, 2) AS max, | |
ROUND(bs.range, 2) AS range, | |
ROUND(CAST(bs.q1 AS numeric), 2) AS q1, | |
ROUND(CAST(bs.median AS numeric), 2) AS median, | |
ROUND(CAST(bs.q3 AS numeric), 2) AS q3, | |
ROUND(bs.stddev, 2) AS stddev, | |
COALESCE(po.outlier_count, to_.outlier_count, 0) AS outliers | |
FROM base_stats bs | |
LEFT JOIN physical_outliers po ON bs.country = po.country AND bs.metric = 'Physical' | |
LEFT JOIN theft_outliers to_ ON bs.country = to_.country AND bs.metric = 'Theft' | |
ORDER BY bs.country, bs.metric; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment