Skip to content

Instantly share code, notes, and snippets.

@abelsonlive
Created April 17, 2014 16:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save abelsonlive/10994675 to your computer and use it in GitHub Desktop.
Save abelsonlive/10994675 to your computer and use it in GitHub Desktop.
Given a min, max, and number of bins, compute a histogram of a numeric column.
CREATE OR REPLACE FUNCTION hist_sfunc (state INTEGER[], val REAL, min REAL, max REAL, nbuckets INTEGER) RETURNS INTEGER[] AS $$
DECLARE
bucket INTEGER;
i INTEGER;
BEGIN
bucket := width_bucket(val, min, max, nbuckets - 1) - 1;
IF state[0] IS NULL THEN
FOR i IN SELECT * FROM generate_series(0, nbuckets - 1) LOOP
state[i] := 0;
END LOOP;
END IF;
state[bucket] = state[bucket] + 1;
RETURN state;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
DROP AGGREGATE IF EXISTS histogram (REAL, REAL, REAL, INTEGER);
CREATE AGGREGATE histogram (REAL, REAL, REAL, INTEGER) (
SFUNC = hist_sfunc,
STYPE = INTEGER[]
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment