felixge/context.md

## context.md

      
    Raw
  

              context.md
            
          
    See my HN comment on the post The 3-minute SQL indexing quiz: Can you spot the five most common mistakes? . Do you agree that my answer ("Not enough information: Definite answer cannot be given") is correct? See proof in psql.txt below.
Question 5

This question is different. First consider the following index and query:
CREATE INDEX tbl_idx ON tbl (a, date_column)
SELECT date_column, count(*)
  FROM tbl
 WHERE a = 38
 GROUP BY date_column
Let's say this query returns at least a few rows.
To implement a new functional requirement, another condition (b = 1) is added to the where clause:
SELECT date_column, count(*)
  FROM tbl
 WHERE a = 38
   AND b = 1
 GROUP BY date_column
How will the change affect performance:

Same: Query performance stays about the same
Not enough information: Definite answer cannot be given (my answer)
Slower: Query takes more time (supposedly correct answer)
Faster: Query take less time

See:


## psql.txt
psql (9.6.3)
Type "help" for help.

felixge=# CREATE TABLE tbl (
felixge(#   a int,
felixge(#   b int,
felixge(#   date_column date
felixge(# );
CREATE TABLE
felixge=# CREATE INDEX tbl_idx ON tbl (a, date_column);
CREATE INDEX
felixge=#
felixge=# INSERT INTO tbl
felixge-# SELECT 38, 2, '2017-01-01'::date+i
felixge-# FROM generate_series(1, 10000) i;
INSERT 0 10000
felixge=#
felixge=# VACUUM ANALYZE tbl;
VACUUM
felixge=#
felixge=# EXPLAIN ANALYZE
felixge-# SELECT date_column, count(*)
felixge-# FROM tbl
felixge-# WHERE a = 38
felixge-# GROUP BY date_column;
                                                 QUERY PLAN
-------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=230.00..330.00 rows=10000 width=12) (actual time=7.326..10.491 rows=10000 loops=1)
   Group Key: date_column
   ->  Seq Scan on tbl  (cost=0.00..180.00 rows=10000 width=4) (actual time=0.009..2.396 rows=10000 loops=1)
         Filter: (a = 38)
 Planning time: 0.255 ms
 Execution time: 11.069 ms
(6 rows)

felixge=#
felixge=#
felixge=# EXPLAIN ANALYZE
felixge-# SELECT date_column, count(*)
felixge-# FROM tbl
felixge-# WHERE a = 38
felixge-# AND b = 1
felixge-# GROUP BY date_column;
                                                QUERY PLAN
-----------------------------------------------------------------------------------------------------------
 GroupAggregate  (cost=205.01..205.03 rows=1 width=12) (actual time=2.669..2.669 rows=0 loops=1)
   Group Key: date_column
   ->  Sort  (cost=205.01..205.01 rows=1 width=4) (actual time=2.668..2.668 rows=0 loops=1)
         Sort Key: date_column
         Sort Method: quicksort  Memory: 25kB
         ->  Seq Scan on tbl  (cost=0.00..205.00 rows=1 width=4) (actual time=2.662..2.662 rows=0 loops=1)
               Filter: ((a = 38) AND (b = 1))
               Rows Removed by Filter: 10000
 Planning time: 0.163 ms
 Execution time: 2.707 ms
(10 rows)

## psql2.txt
This is another example where the index-only scan actually kicks in for query 1, but
is still outperformed by a seq scan in query 2.

psql (9.6.3)
Type "help" for help.

felixge=# CREATE TABLE tbl (
felixge(#   a int,
felixge(#   b int,
felixge(#   date_column date
felixge(# );
CREATE TABLE
felixge=# CREATE INDEX tbl_idx ON tbl (a, date_column);
CREATE INDEX
felixge=#
felixge=# INSERT INTO tbl
felixge-# SELECT 38, 1, '2017-01-01'::date+i
felixge-# FROM generate_series(1, 10) i;
INSERT 0 10
felixge=# INSERT INTO tbl
felixge-# SELECT 38, 2, '2017-01-01'::date+i
felixge-# FROM generate_series(1, 10000) i;
INSERT 0 10000
felixge=# INSERT INTO tbl
felixge-# SELECT 39, 2, '2017-01-01'::date+i
felixge-# FROM generate_series(1, 10000) i;
INSERT 0 10000
felixge=#
felixge=#
felixge=# VACUUM ANALYZE tbl;
VACUUM
felixge=#
felixge=# EXPLAIN ANALYZE
felixge-# SELECT date_column, count(*)
felixge-# FROM tbl
felixge-# WHERE a = 38
felixge-# GROUP BY date_column;
                                                            QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------
 GroupAggregate  (cost=0.29..416.55 rows=7504 width=12) (actual time=0.042..7.761 rows=10000 loops=1)
   Group Key: date_column
   ->  Index Only Scan using tbl_idx on tbl  (cost=0.29..291.46 rows=10010 width=4) (actual time=0.030..2.144 rows=10010 loops=1)
         Index Cond: (a = 38)
         Heap Fetches: 0
 Planning time: 0.312 ms
 Execution time: 8.293 ms
(7 rows)

felixge=#
felixge=#
felixge=# EXPLAIN ANALYZE
felixge-# SELECT date_column, count(*)
felixge-# FROM tbl
felixge-# WHERE a = 38
felixge-# AND b = 1
felixge-# GROUP BY date_column;
                                                 QUERY PLAN
------------------------------------------------------------------------------------------------------------
 GroupAggregate  (cost=409.21..409.30 rows=5 width=12) (actual time=4.291..4.301 rows=10 loops=1)
   Group Key: date_column
   ->  Sort  (cost=409.21..409.22 rows=5 width=4) (actual time=4.286..4.286 rows=10 loops=1)
         Sort Key: date_column
         Sort Method: quicksort  Memory: 25kB
         ->  Seq Scan on tbl  (cost=0.00..409.15 rows=5 width=4) (actual time=0.012..4.264 rows=10 loops=1)
               Filter: ((a = 38) AND (b = 1))
               Rows Removed by Filter: 20000
 Planning time: 0.172 ms
 Execution time: 4.338 ms
(10 rows)
	psql (9.6.3)
	Type "help" for help.

	felixge=# CREATE TABLE tbl (
	felixge(# a int,
	felixge(# b int,
	felixge(# date_column date
	felixge(# );
	CREATE TABLE
	felixge=# CREATE INDEX tbl_idx ON tbl (a, date_column);
	CREATE INDEX
	felixge=#
	felixge=# INSERT INTO tbl
	felixge-# SELECT 38, 2, '2017-01-01'::date+i
	felixge-# FROM generate_series(1, 10000) i;
	INSERT 0 10000
	felixge=#
	felixge=# VACUUM ANALYZE tbl;
	VACUUM
	felixge=#
	felixge=# EXPLAIN ANALYZE
	felixge-# SELECT date_column, count(*)
	felixge-# FROM tbl
	felixge-# WHERE a = 38
	felixge-# GROUP BY date_column;
	QUERY PLAN
	-------------------------------------------------------------------------------------------------------------
	HashAggregate (cost=230.00..330.00 rows=10000 width=12) (actual time=7.326..10.491 rows=10000 loops=1)
	Group Key: date_column
	-> Seq Scan on tbl (cost=0.00..180.00 rows=10000 width=4) (actual time=0.009..2.396 rows=10000 loops=1)
	Filter: (a = 38)
	Planning time: 0.255 ms
	Execution time: 11.069 ms
	(6 rows)

	felixge=#
	felixge=#
	felixge=# EXPLAIN ANALYZE
	felixge-# SELECT date_column, count(*)
	felixge-# FROM tbl
	felixge-# WHERE a = 38
	felixge-# AND b = 1
	felixge-# GROUP BY date_column;
	QUERY PLAN
	-----------------------------------------------------------------------------------------------------------
	GroupAggregate (cost=205.01..205.03 rows=1 width=12) (actual time=2.669..2.669 rows=0 loops=1)
	Group Key: date_column
	-> Sort (cost=205.01..205.01 rows=1 width=4) (actual time=2.668..2.668 rows=0 loops=1)
	Sort Key: date_column
	Sort Method: quicksort Memory: 25kB
	-> Seq Scan on tbl (cost=0.00..205.00 rows=1 width=4) (actual time=2.662..2.662 rows=0 loops=1)
	Filter: ((a = 38) AND (b = 1))
	Rows Removed by Filter: 10000
	Planning time: 0.163 ms
	Execution time: 2.707 ms
	(10 rows)
	This is another example where the index-only scan actually kicks in for query 1, but
	is still outperformed by a seq scan in query 2.

	psql (9.6.3)
	Type "help" for help.

	felixge=# CREATE TABLE tbl (
	felixge(# a int,
	felixge(# b int,
	felixge(# date_column date
	felixge(# );
	CREATE TABLE
	felixge=# CREATE INDEX tbl_idx ON tbl (a, date_column);
	CREATE INDEX
	felixge=#
	felixge=# INSERT INTO tbl
	felixge-# SELECT 38, 1, '2017-01-01'::date+i
	felixge-# FROM generate_series(1, 10) i;
	INSERT 0 10
	felixge=# INSERT INTO tbl
	felixge-# SELECT 38, 2, '2017-01-01'::date+i
	felixge-# FROM generate_series(1, 10000) i;
	INSERT 0 10000
	felixge=# INSERT INTO tbl
	felixge-# SELECT 39, 2, '2017-01-01'::date+i
	felixge-# FROM generate_series(1, 10000) i;
	INSERT 0 10000
	felixge=#
	felixge=#
	felixge=# VACUUM ANALYZE tbl;
	VACUUM
	felixge=#
	felixge=# EXPLAIN ANALYZE
	felixge-# SELECT date_column, count(*)
	felixge-# FROM tbl
	felixge-# WHERE a = 38
	felixge-# GROUP BY date_column;
	QUERY PLAN
	----------------------------------------------------------------------------------------------------------------------------------
	GroupAggregate (cost=0.29..416.55 rows=7504 width=12) (actual time=0.042..7.761 rows=10000 loops=1)
	Group Key: date_column
	-> Index Only Scan using tbl_idx on tbl (cost=0.29..291.46 rows=10010 width=4) (actual time=0.030..2.144 rows=10010 loops=1)
	Index Cond: (a = 38)
	Heap Fetches: 0
	Planning time: 0.312 ms
	Execution time: 8.293 ms
	(7 rows)

	felixge=#
	felixge=#
	felixge=# EXPLAIN ANALYZE
	felixge-# SELECT date_column, count(*)
	felixge-# FROM tbl
	felixge-# WHERE a = 38
	felixge-# AND b = 1
	felixge-# GROUP BY date_column;
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------
	GroupAggregate (cost=409.21..409.30 rows=5 width=12) (actual time=4.291..4.301 rows=10 loops=1)
	Group Key: date_column
	-> Sort (cost=409.21..409.22 rows=5 width=4) (actual time=4.286..4.286 rows=10 loops=1)
	Sort Key: date_column
	Sort Method: quicksort Memory: 25kB
	-> Seq Scan on tbl (cost=0.00..409.15 rows=5 width=4) (actual time=0.012..4.264 rows=10 loops=1)
	Filter: ((a = 38) AND (b = 1))
	Rows Removed by Filter: 20000
	Planning time: 0.172 ms
	Execution time: 4.338 ms
	(10 rows)