Created
April 17, 2014 16:06
-
-
Save abelsonlive/10994675 to your computer and use it in GitHub Desktop.
Given a min, max, and number of bins, compute a histogram of a numeric column.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE FUNCTION hist_sfunc (state INTEGER[], val REAL, min REAL, max REAL, nbuckets INTEGER) RETURNS INTEGER[] AS $$ | |
DECLARE | |
bucket INTEGER; | |
i INTEGER; | |
BEGIN | |
bucket := width_bucket(val, min, max, nbuckets - 1) - 1; | |
IF state[0] IS NULL THEN | |
FOR i IN SELECT * FROM generate_series(0, nbuckets - 1) LOOP | |
state[i] := 0; | |
END LOOP; | |
END IF; | |
state[bucket] = state[bucket] + 1; | |
RETURN state; | |
END; | |
$$ LANGUAGE plpgsql IMMUTABLE; | |
DROP AGGREGATE IF EXISTS histogram (REAL, REAL, REAL, INTEGER); | |
CREATE AGGREGATE histogram (REAL, REAL, REAL, INTEGER) ( | |
SFUNC = hist_sfunc, | |
STYPE = INTEGER[] | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment