Skip to content

Instantly share code, notes, and snippets.

@figital
Forked from wolever/example.sql
Created April 18, 2017 20:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save figital/2804205f3a10e110850c5af1fe5f9ece to your computer and use it in GitHub Desktop.
Save figital/2804205f3a10e110850c5af1fe5f9ece to your computer and use it in GitHub Desktop.
A simple Postgres aggregate function for calculating a trimmed mean, excluding values outside N standard deviations from the mean: `tmean(v, standard_deviations)` (for example: `tmean(rating, 1.75)`).
DROP TABLE IF EXISTS foo;
CREATE TEMPORARY TABLE foo (x FLOAT);
INSERT INTO foo VALUES (1);
INSERT INTO foo VALUES (2);
INSERT INTO foo VALUES (3);
INSERT INTO foo VALUES (4);
INSERT INTO foo VALUES (100);
SELECT avg(x), tmean(x, 2.0), tmean(x, 1.5) FROM foo;
-- avg | tmean | tmean
-- -----+-------+-------
-- 22 | 22 | 2.5
DROP TYPE IF EXISTS tmean_stype CASCADE;
CREATE TYPE tmean_stype AS (
deviations FLOAT,
count INT,
acc FLOAT,
acc2 FLOAT,
vals FLOAT[]
);
CREATE OR REPLACE FUNCTION tmean_sfunc(tmean_stype, float, float)
RETURNS tmean_stype AS $$
SELECT $3, $1.count + 1, $1.acc + $2, $1.acc2 + ($2 * $2), array_append($1.vals, $2);
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION tmean_finalfunc(tmean_stype)
RETURNS float AS $$
DECLARE
fcount INT;
facc FLOAT;
mean FLOAT;
stddev FLOAT;
lbound FLOAT;
ubound FLOAT;
val FLOAT;
BEGIN
mean := $1.acc / $1.count;
stddev := sqrt(($1.acc2 / $1.count) - (mean * mean));
lbound := mean - stddev * $1.deviations;
ubound := mean + stddev * $1.deviations;
-- RAISE NOTICE 'mean: % stddev: % lbound: % ubound: %', mean, stddev, lbound, ubound;
fcount := 0;
facc := 0;
FOR i IN array_lower($1.vals, 1) .. array_upper($1.vals, 1) LOOP
val := $1.vals[i];
IF val >= lbound AND val <= ubound THEN
fcount := fcount + 1;
facc := facc + val;
END IF;
END LOOP;
IF fcount = 0 THEN
return NULL;
END IF;
RETURN facc / fcount;
END;
$$ LANGUAGE plpgsql;
CREATE AGGREGATE tmean(float, float)
(
SFUNC = tmean_sfunc,
STYPE = tmean_stype,
FINALFUNC = tmean_finalfunc,
INITCOND = '(-1, 0, 0, 0, {})'
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment