Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Given a min, max, and number of bins, compute a histogram of a numeric column.
CREATE OR REPLACE FUNCTION hist_sfunc (state INTEGER[], val REAL, min REAL, max REAL, nbuckets INTEGER) RETURNS INTEGER[] AS $$
DECLARE
bucket INTEGER;
i INTEGER;
BEGIN
bucket := width_bucket(val, min, max, nbuckets - 1) - 1;
IF state[0] IS NULL THEN
FOR i IN SELECT * FROM generate_series(0, nbuckets - 1) LOOP
state[i] := 0;
END LOOP;
END IF;
state[bucket] = state[bucket] + 1;
RETURN state;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
DROP AGGREGATE IF EXISTS histogram (REAL, REAL, REAL, INTEGER);
CREATE AGGREGATE histogram (REAL, REAL, REAL, INTEGER) (
SFUNC = hist_sfunc,
STYPE = INTEGER[]
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.