Skip to content

Instantly share code, notes, and snippets.

@momer
Last active December 16, 2015 11:29
Show Gist options
  • Save momer/5428096 to your computer and use it in GitHub Desktop.
Save momer/5428096 to your computer and use it in GitHub Desktop.
Super Fast Random Postgres Queries
--
-- Person's unoptimized query
--
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Seq Scan on keyword_tracked_tweets (cost=0.06..484854.06 rows=50015 width=172) (actual time=16639.692..32543.957 rows=1 loops=1)
Filter: ((id)::double precision = trunc((random() * ($1)::double precision)))
InitPlan 2 (returns $1)
-> Result (cost=0.05..0.06 rows=1 width=0) (actual time=0.045..0.047 rows=1 loops=1)
InitPlan 1 (returns $0)
-> Limit (cost=0.00..0.05 rows=1 width=4) (actual time=0.034..0.037 rows=1 loops=1)
-> Index Scan Backward using keyword_tracked_tweets_pkey on keyword_tracked_tweets (cost=0.00..497474.41 rows=10002920 width=4) (actual time=0.028..0.028 rows=1 loops=1)
Index Cond: (id IS NOT NULL)
Total runtime: 32544.849 ms
(9 rows)
(END)
-- Generate a random id from the set of ints from 0 -> last ID in the table.
--Note - may include items that have been deleted.
CREATE OR REPLACE FUNCTION generate_random_id() RETURNS integer AS $$
DECLARE
myID integer;
BEGIN
SELECT trunc(random() * (select MAX(id) from keyword_tracked_tweets)) INTO myID;
RETURN myID;
END;
$$ LANGUAGE plpgsql;
-- Find that ID's row in the database
CREATE OR REPLACE FUNCTION find_random_id()
RETURNS SETOF keyword_tracked_tweets AS $BODY$
DECLARE
r keyword_tracked_tweets%rowtype;
BEGIN
FOR r IN SELECT * from keyword_tracked_tweets where id = (SELECT generate_random_id())
LOOP
RETURN NEXT r;
END LOOP;
RETURN;
END
$BODY$
LANGUAGE 'plpgsql' ;
-- RESULTS of running this 100 times on a table of >1 million rows = min: ~1ms max: ~72ms average: around 20ms
EXPLAIN ANALYZE SELECT * FROM find_random_id();
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
Function Scan on find_random_id (cost=0.25..10.25 rows=1000 width=116) (actual time=11.600..11.609 rows=1 loops=1)
Total runtime: 11.649 ms
(
# EXPLAIN ANALYZE SELECT * from keyword_tracked_tweets where id = 830851;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------
Index Scan using keyword_tracked_tweets_pkey on keyword_tracked_tweets (cost=0.00..9.14 rows=1 width=172) (actual time=0.024..0.028 rows=1 loops=1)
Index Cond: (id = 830851)
Total runtime: 0.075 ms
(3 rows)
(END)
EXPLAIN ANALYZE SELECT * FROM keyword_tracked_tweets
WHERE id = (trunc(random() * (select MAX(id) from keyword_tracked_tweets))) limit 1;
# EXPLAIN ANALYZE SELECT trunc(random() * (select MAX(id) from keyword_tracked_tweets));
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Result (cost=0.06..0.08 rows=1 width=0) (actual time=0.096..0.099 rows=1 loops=1)
InitPlan 2 (returns $1)
-> Result (cost=0.05..0.06 rows=1 width=0) (actual time=0.071..0.073 rows=1 loops=1)
InitPlan 1 (returns $0)
-> Limit (cost=0.00..0.05 rows=1 width=4) (actual time=0.060..0.062 rows=1 loops=1)
-> Index Scan Backward using keyword_tracked_tweets_pkey on keyword_tracked_tweets (cost=0.00..497474.41 rows=10002920 width=4) (actual time=0.049..0.049 rows=1 loops=1)
Index Cond: (id IS NOT NULL)
Total runtime: 0.181 ms
(8 rows)
(END)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment