Skip to content

Instantly share code, notes, and snippets.

@jj0hns0n
Created June 3, 2025 21:27
Show Gist options
  • Save jj0hns0n/3bd083b55c8d1713945e9726a272cd6d to your computer and use it in GitHub Desktop.
Save jj0hns0n/3bd083b55c8d1713945e9726a272cd6d to your computer and use it in GitHub Desktop.
-- Step 1: Compute summary stats per country and metric
WITH base_stats AS (
SELECT
country,
'Physical' AS metric,
MIN(physical) AS min,
MAX(physical) AS max,
MAX(physical) - MIN(physical) AS range,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY physical) AS q1,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY physical) AS median,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY physical) AS q3,
STDDEV(physical) AS stddev
FROM geosure_v5_20250603_443
GROUP BY country
UNION ALL
SELECT
country,
'Theft' AS metric,
MIN(theft),
MAX(theft),
MAX(theft) - MIN(theft),
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY theft),
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY theft),
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY theft),
STDDEV(theft)
FROM geosure_v5_20250603_443
GROUP BY country
),
-- Step 2: Count outliers for physical
physical_outliers AS (
SELECT
t.country,
COUNT(*) AS outlier_count
FROM geosure_v5_20250603_443 t
JOIN (
SELECT
country,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY physical) AS q1,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY physical) AS q3
FROM geosure_v5_20250603_443
GROUP BY country
) q ON t.country = q.country
WHERE t.physical < q.q1 - 1.5 * (q.q3 - q.q1)
OR t.physical > q.q3 + 1.5 * (q.q3 - q.q1)
GROUP BY t.country
),
-- Step 3: Count outliers for theft
theft_outliers AS (
SELECT
t.country,
COUNT(*) AS outlier_count
FROM geosure_v5_20250603_443 t
JOIN (
SELECT
country,
PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY theft) AS q1,
PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY theft) AS q3
FROM geosure_v5_20250603_443
GROUP BY country
) q ON t.country = q.country
WHERE t.theft < q.q1 - 1.5 * (q.q3 - q.q1)
OR t.theft > q.q3 + 1.5 * (q.q3 - q.q1)
GROUP BY t.country
)
-- Step 4: Combine stats with outliers
SELECT
bs.country,
bs.metric,
ROUND(bs.min, 2) AS min,
ROUND(bs.max, 2) AS max,
ROUND(bs.range, 2) AS range,
ROUND(CAST(bs.q1 AS numeric), 2) AS q1,
ROUND(CAST(bs.median AS numeric), 2) AS median,
ROUND(CAST(bs.q3 AS numeric), 2) AS q3,
ROUND(bs.stddev, 2) AS stddev,
COALESCE(po.outlier_count, to_.outlier_count, 0) AS outliers
FROM base_stats bs
LEFT JOIN physical_outliers po ON bs.country = po.country AND bs.metric = 'Physical'
LEFT JOIN theft_outliers to_ ON bs.country = to_.country AND bs.metric = 'Theft'
ORDER BY bs.country, bs.metric;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment