Skip to content

Instantly share code, notes, and snippets.

@wolever
Last active April 19, 2023 20:28
Show Gist options
  • Star 27 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save wolever/9164392 to your computer and use it in GitHub Desktop.
Save wolever/9164392 to your computer and use it in GitHub Desktop.
Functions to create and draw histograms with PostgreSQL.
-- Functions to create and draw histograms with PostgreSQL.
--
-- psql# WITH email_lengths AS (
-- -# SELECT length(email) AS length
-- -# FROM auth_user
-- -# LIMIT 100
-- -# )
-- -# SELECT * FROM show_histogram((SELECT histogram(length, 0, 32, 6) FROM email_lengths))
-- bucket | range | count | bar | cumbar | cumsum | cumpct
-- --------+-------------------------------------+-------+--------------------------------+--------------------------------+--------+------------------------
-- 0 | [0,5.33333333333333) | 1 | | | 1 | 0.00273224043715846995
-- 1 | [5.33333333333333,10.6666666666667) | 5 | = | | 6 | 0.01639344262295081967
-- 2 | [10.6666666666667,16) | 149 | ============================== | ============= | 155 | 0.42349726775956284153
-- 3 | [16,21.3333333333333) | 145 | ============================= | ========================= | 300 | 0.81967213114754098361
-- 4 | [21.3333333333333,26.6666666666667) | 49 | ========== | ============================= | 349 | 0.95355191256830601093
-- 5 | [26.6666666666667,32) | 17 | === | ============================== | 366 | 1.00000000000000000000
-- (6 rows)
-- psql#
DROP TYPE IF EXISTS floatrange CASCADE;
CREATE TYPE floatrange AS RANGE (
subtype = float8,
subtype_diff = float8mi
);
DROP TYPE IF EXISTS histogram_result CASCADE;
CREATE TYPE histogram_result AS (
count INTEGER,
bucket INTEGER,
range floatrange
);
CREATE OR REPLACE FUNCTION hist_sfunc(state histogram_result[], val float8, min float8, max float8, nbuckets INTEGER) RETURNS histogram_result[] AS $$
DECLARE
bucket INTEGER;
width float8;
i INTEGER;
BEGIN
-- width_bucket uses nbuckets + 1 (!) and starts at 1.
bucket := width_bucket(val, min, max, nbuckets - 1) - 1;
-- Init the array with the correct number of 0's so the caller doesn't see NULLs
IF state[0] IS NULL THEN
width := (max - min) / nbuckets;
FOR i IN SELECT * FROM generate_series(0, nbuckets - 1) LOOP
state[i] := (0, i, floatrange(i * width, (i + 1) * width));
END LOOP;
END IF;
state[bucket] = (state[bucket].count + 1, state[bucket].bucket, state[bucket].range);
RETURN state;
END;
$$ LANGUAGE plpgsql IMMUTABLE;
CREATE AGGREGATE histogram(float8, float8, float8, INTEGER) (
SFUNC = hist_sfunc,
STYPE = histogram_result[]
);
CREATE OR REPLACE FUNCTION histobar(v float8, tick_size float8)
RETURNS TEXT AS $$
SELECT repeat('=', (v * tick_size)::integer);
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION show_histogram(h histogram_result[])
RETURNS TABLE(bucket INTEGER, range floatrange, count INTEGER, bar TEXT, cumbar TEXT, cumsum INTEGER, cumpct NUMERIC) AS $$
DECLARE
r histogram_result;
min_count integer := (select min(x.count) from unnest(h) as x);
max_count integer := (select max(x.count) from unnest(h) as x);
total_count integer := (select sum(x.count) from unnest(h) as x);
bar_max_width integer := 30;
bar_tick_size float8 := bar_max_width / (max_count - min_count)::float8;
bar text;
cumsum integer := 0;
cumpct numeric;
BEGIN
FOREACH r IN ARRAY h LOOP
IF r.bucket IS NULL THEN
CONTINUE;
END IF;
cumsum := cumsum + r.count;
cumpct := (cumsum::numeric / total_count);
bar := histobar(r.count, bar_tick_size);
RETURN QUERY VALUES (
r.bucket,
r.range,
r.count,
bar,
histobar(cumpct, bar_max_width),
cumsum,
cumpct
);
END loop;
END;
$$ LANGUAGE plpgsql;
@rayshan
Copy link

rayshan commented Sep 2, 2019

@wolever thanks for this. Would you consider supporting negative range values? E.g.

SELECT * FROM show_histogram((SELECT histogram(returns, -100, 100, 10) FROM data));

Results in

bucket	range
0	[0,20)
1	[20,40)
2	[40,60)
3	[60,80)
4	[80,100)
5	[100,120)
6	[120,140)
7	[140,160)
8	[160,180)
9	[180,200)

Expected: bucket 0 should be [-100, -80].

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment