Skip to content

Instantly share code, notes, and snippets.

# matheusoliveira/random.sql Created Feb 21, 2015

A very optimized way of getting random rows of a table on PostgreSQL (gets by "id" in a fast and non-biased way, if with gaps)
 /* One random row from table "tbl" */ WITH RECURSIVE r AS ( SELECT NULL::int AS id, min(id) AS min_id, max(id) AS max_id, 0 AS cnt FROM tbl UNION ALL SELECT tbl.id, r.min_id, r.max_id, r.cnt + 1 FROM r LEFT JOIN tbl ON tbl.id = (SELECT floor(random() * (r.max_id - r.min_id + 1))::int) WHERE r.id IS NULL ) SELECT r.id, r.cnt FROM r WHERE r.id IS NOT NULL; /* 10 (easily adjustable) random rows from table "tbl" (doesn't handle duplication, a simple exercise for the reader [tip: arrays and ANY operator]) */ WITH RECURSIVE r AS ( SELECT NULL::int AS id, min(id) AS min_id, max(id) AS max_id, 0 AS cnt, 0 AS taken FROM tbl2 UNION ALL SELECT tbl2.id, r.min_id, r.max_id, r.cnt + 1, r.taken + CASE WHEN tbl2.id IS NULL THEN 0 ELSE 1 END FROM r LEFT JOIN tbl2 ON tbl2.id = (SELECT floor(random() * (r.max_id - r.min_id + 1))::int) WHERE r.taken < 10 ) SELECT r.id, r.cnt FROM r WHERE r.id IS NOT NULL; /* OBS: On both "cnt" result is just to show how many iterations it took to return */
 CREATE TABLE tbl AS SELECT id FROM generate_series(1, 10000000, 50 /* some gaps */) id; CREATE INDEX ON tbl (id); VACUUM ANALYZE tbl;
to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.