momer/explain_analyze.sql

## explain_analyze.sql
--
-- Person's unoptimized query
--

                                                                                          QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Seq Scan on keyword_tracked_tweets  (cost=0.06..484854.06 rows=50015 width=172) (actual time=16639.692..32543.957 rows=1 loops=1)
   Filter: ((id)::double precision = trunc((random() * ($1)::double precision)))
   InitPlan 2 (returns $1)
     ->  Result  (cost=0.05..0.06 rows=1 width=0) (actual time=0.045..0.047 rows=1 loops=1)
           InitPlan 1 (returns $0)
             ->  Limit  (cost=0.00..0.05 rows=1 width=4) (actual time=0.034..0.037 rows=1 loops=1)
                   ->  Index Scan Backward using keyword_tracked_tweets_pkey on keyword_tracked_tweets  (cost=0.00..497474.41 rows=10002920 width=4) (actual time=0.028..0.028 rows=1 loops=1)
                         Index Cond: (id IS NOT NULL)
 Total runtime: 32544.849 ms
(9 rows)

(END)


## final_working_functions.sql
-- Generate a random id from the set of ints from 0 -> last ID in the table.
--Note - may include items that have been deleted.
CREATE OR REPLACE FUNCTION generate_random_id() RETURNS integer AS $$
DECLARE
  myID integer;
BEGIN
  SELECT trunc(random() * (select MAX(id) from keyword_tracked_tweets)) INTO myID;
  RETURN myID;
END;
$$ LANGUAGE plpgsql;

-- Find that ID's row in the database
CREATE OR REPLACE FUNCTION find_random_id()
RETURNS SETOF keyword_tracked_tweets AS $BODY$
DECLARE
  r keyword_tracked_tweets%rowtype;
BEGIN
  FOR r IN SELECT * from keyword_tracked_tweets where id = (SELECT generate_random_id())
    LOOP
      RETURN NEXT r;
    END LOOP;
  RETURN;
END
$BODY$
LANGUAGE 'plpgsql' ;

-- RESULTS of running this 100 times on a table of >1 million rows = min: ~1ms max: ~72ms average: around 20ms
EXPLAIN ANALYZE SELECT * FROM find_random_id();
                                                     QUERY PLAN
---------------------------------------------------------------------------------------------------------------------
 Function Scan on find_random_id  (cost=0.25..10.25 rows=1000 width=116) (actual time=11.600..11.609 rows=1 loops=1)
 Total runtime: 11.649 ms
(

## gistfile1.txt
# EXPLAIN ANALYZE SELECT * from keyword_tracked_tweets where id = 830851;
                                                                      QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using keyword_tracked_tweets_pkey on keyword_tracked_tweets  (cost=0.00..9.14 rows=1 width=172) (actual time=0.024..0.028 rows=1 loops=1)
   Index Cond: (id = 830851)
 Total runtime: 0.075 ms
(3 rows)

(END)


## sql.sql
EXPLAIN ANALYZE SELECT * FROM keyword_tracked_tweets
WHERE id = (trunc(random() * (select MAX(id) from keyword_tracked_tweets))) limit 1;

## trunc.sql
# EXPLAIN ANALYZE SELECT trunc(random() * (select MAX(id) from keyword_tracked_tweets));

-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Result  (cost=0.06..0.08 rows=1 width=0) (actual time=0.096..0.099 rows=1 loops=1)
   InitPlan 2 (returns $1)
     ->  Result  (cost=0.05..0.06 rows=1 width=0) (actual time=0.071..0.073 rows=1 loops=1)
           InitPlan 1 (returns $0)
             ->  Limit  (cost=0.00..0.05 rows=1 width=4) (actual time=0.060..0.062 rows=1 loops=1)
                   ->  Index Scan Backward using keyword_tracked_tweets_pkey on keyword_tracked_tweets  (cost=0.00..497474.41 rows=10002920 width=4) (actual time=0.049..0.049 rows=1 loops=1)
                         Index Cond: (id IS NOT NULL)
 Total runtime: 0.181 ms
(8 rows)

(END)
	--
	-- Person's unoptimized query
	--

	QUERY PLAN
	-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	Seq Scan on keyword_tracked_tweets (cost=0.06..484854.06 rows=50015 width=172) (actual time=16639.692..32543.957 rows=1 loops=1)
	Filter: ((id)::double precision = trunc((random() * ($1)::double precision)))
	InitPlan 2 (returns $1)
	-> Result (cost=0.05..0.06 rows=1 width=0) (actual time=0.045..0.047 rows=1 loops=1)
	InitPlan 1 (returns $0)
	-> Limit (cost=0.00..0.05 rows=1 width=4) (actual time=0.034..0.037 rows=1 loops=1)
	-> Index Scan Backward using keyword_tracked_tweets_pkey on keyword_tracked_tweets (cost=0.00..497474.41 rows=10002920 width=4) (actual time=0.028..0.028 rows=1 loops=1)
	Index Cond: (id IS NOT NULL)
	Total runtime: 32544.849 ms
	(9 rows)

	(END)
	-- Generate a random id from the set of ints from 0 -> last ID in the table.
	--Note - may include items that have been deleted.
	CREATE OR REPLACE FUNCTION generate_random_id() RETURNS integer AS $$
	DECLARE
	myID integer;
	BEGIN
	SELECT trunc(random() * (select MAX(id) from keyword_tracked_tweets)) INTO myID;
	RETURN myID;
	END;
	$$ LANGUAGE plpgsql;

	-- Find that ID's row in the database
	CREATE OR REPLACE FUNCTION find_random_id()
	RETURNS SETOF keyword_tracked_tweets AS $BODY$
	DECLARE
	r keyword_tracked_tweets%rowtype;
	BEGIN
	FOR r IN SELECT * from keyword_tracked_tweets where id = (SELECT generate_random_id())
	LOOP
	RETURN NEXT r;
	END LOOP;
	RETURN;
	END
	$BODY$
	LANGUAGE 'plpgsql' ;

	-- RESULTS of running this 100 times on a table of >1 million rows = min: ~1ms max: ~72ms average: around 20ms
	EXPLAIN ANALYZE SELECT * FROM find_random_id();
	QUERY PLAN
	---------------------------------------------------------------------------------------------------------------------
	Function Scan on find_random_id (cost=0.25..10.25 rows=1000 width=116) (actual time=11.600..11.609 rows=1 loops=1)
	Total runtime: 11.649 ms
	(
	# EXPLAIN ANALYZE SELECT * from keyword_tracked_tweets where id = 830851;
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------------------------------------------------
	Index Scan using keyword_tracked_tweets_pkey on keyword_tracked_tweets (cost=0.00..9.14 rows=1 width=172) (actual time=0.024..0.028 rows=1 loops=1)
	Index Cond: (id = 830851)
	Total runtime: 0.075 ms
	(3 rows)

	(END)
	EXPLAIN ANALYZE SELECT * FROM keyword_tracked_tweets
	WHERE id = (trunc(random() * (select MAX(id) from keyword_tracked_tweets))) limit 1;
	# EXPLAIN ANALYZE SELECT trunc(random() * (select MAX(id) from keyword_tracked_tweets));

	-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
	Result (cost=0.06..0.08 rows=1 width=0) (actual time=0.096..0.099 rows=1 loops=1)
	InitPlan 2 (returns $1)
	-> Result (cost=0.05..0.06 rows=1 width=0) (actual time=0.071..0.073 rows=1 loops=1)
	InitPlan 1 (returns $0)
	-> Limit (cost=0.00..0.05 rows=1 width=4) (actual time=0.060..0.062 rows=1 loops=1)
	-> Index Scan Backward using keyword_tracked_tweets_pkey on keyword_tracked_tweets (cost=0.00..497474.41 rows=10002920 width=4) (actual time=0.049..0.049 rows=1 loops=1)
	Index Cond: (id IS NOT NULL)
	Total runtime: 0.181 ms
	(8 rows)

	(END)