cabecada/postgres_composite_index

## postgres_composite_index
\d+ dates
                                                               Table "public.dates"
         Column         |  Type   | Collation | Nullable |              Default              | Storage  | Compression | Stats target | Description
------------------------+---------+-----------+----------+-----------------------------------+----------+-------------+--------------+-------------
 id                     | integer |           | not null | nextval('dates_id_seq'::regclass) | plain    |             |              |
 date                   | date    |           |          |                                   | plain    |             |              |
 epoch                  | bigint  |           |          |                                   | plain    |             |              |
 day_suffix             | text    |           |          |                                   | extended | pglz        |              |
 day_name               | text    |           |          |                                   | extended | pglz        |              |
 day_of_week            | integer |           |          |                                   | plain    |             |              |
 day_of_month           | integer |           |          |                                   | plain    |             |              |
 day_of_quarter         | integer |           |          |                                   | plain    |             |              |
 day_of_year            | integer |           |          |                                   | plain    |             |              |
 week_of_month          | integer |           |          |                                   | plain    |             |              |
 week_of_year           | integer |           |          |                                   | plain    |             |              |
 week_of_year_iso       | text    |           |          |                                   | extended | pglz        |              |
 month_actual           | integer |           |          |                                   | plain    |             |              |
 month_name             | text    |           |          |                                   | extended | pglz        |              |
 month_name_abbreviated | text    |           |          |                                   | extended | pglz        |              |
 quarter_actual         | integer |           |          |                                   | plain    |             |              |
 quarter_name           | text    |           |          |                                   | extended | pglz        |              |
 year_actual            | integer |           |          |                                   | plain    |             |              |
 year_iso               | integer |           |          |                                   | plain    |             |              |
 first_day_of_week      | date    |           |          |                                   | plain    |             |              |
 last_day_of_week       | date    |           |          |                                   | plain    |             |              |
 first_day_of_month     | date    |           |          |                                   | plain    |             |              |
 last_day_of_month      | date    |           |          |                                   | plain    |             |              |
 first_day_of_quarter   | date    |           |          |                                   | plain    |             |              |
 last_day_of_quarter    | date    |           |          |                                   | plain    |             |              |
 first_day_of_year      | date    |           |          |                                   | plain    |             |              |
 last_day_of_year       | date    |           |          |                                   | plain    |             |              |
 mmyyyy                 | text    |           |          |                                   | extended | pglz        |              |
 mmddyyyy               | text    |           |          |                                   | extended | pglz        |              |
 weekend_indr           | boolean |           |          |                                   | plain    |             |              |
Indexes:
    "dates_pkey" PRIMARY KEY, btree (id)
    "dates_day_name_quarter_name_idx" btree (day_name, quarter_name)
    "dates_id_day_name_quarter_name_idx" btree (id, day_name, quarter_name)
Triggers:
    insert_date_dimension_row BEFORE INSERT ON dates FOR EACH ROW EXECUTE FUNCTION insert_date_dimension_row()
Access method: heap

/*
epoch - distinct value for each row
day_name - 7 values distinct in 13k rows
quarter_name - 4 values distinct in 13k rows
id - distinct value for each row
*/

postgres=# select min(date), max(date), count(*) from dates;
    min     |    max     | count
------------+------------+-------
 2014-01-01 | 2050-01-01 | 13150
(1 row)


postgres=# select count(*), day_name from dates group by day_name;
 count | day_name
-------+-----------
  1879 | Friday
  1878 | Monday
  1879 | Saturday
  1878 | Sunday
  1879 | Thursday
  1878 | Tuesday
  1879 | Wednesday
(7 rows)

postgres=# select count(*), quarter_name from dates group by quarter_name;
 count | quarter_name
-------+--------------
  3312 | Fourth
  3276 | Second
  3250 | First
  3312 | Third
(4 rows)


-- since id is part of predicate and leading column of the index, index is used
postgres=# explain analyze select * from dates where id = 1 and day_name = 'Wednesday' and quarter_name = 'Second';
                                                                 QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using dates_id_day_name_quarter_name_idx on dates  (cost=0.29..8.31 rows=1 width=150) (actual time=0.013..0.013 rows=0 loops=1)
   Index Cond: ((id = 1) AND (day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
 Planning Time: 0.088 ms
 Execution Time: 0.031 ms
(4 rows)

-- since no id, but only day_name and quarter_name, both which have less distinct values, seq is used.
postgres=# explain analyze select * from dates where day_name = 'Wednesday' and quarter_name = 'Second';
                                              QUERY PLAN
-------------------------------------------------------------------------------------------------------
 Seq Scan on dates  (cost=0.00..503.25 rows=468 width=150) (actual time=0.029..2.673 rows=468 loops=1)
   Filter: ((day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
   Rows Removed by Filter: 12682
 Planning Time: 0.073 ms
 Execution Time: 2.707 ms
(5 rows)


postgres=# explain analyze select * from dates where id = 1 and quarter_name = 'Second';
                                                                 QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using dates_id_day_name_quarter_name_idx on dates  (cost=0.29..8.30 rows=1 width=150) (actual time=0.011..0.012 rows=0 loops=1)
   Index Cond: ((id = 1) AND (quarter_name = 'Second'::text))
 Planning Time: 0.082 ms
 Execution Time: 0.029 ms
(4 rows)

postgres=# explain analyze select * from dates where quarter_name = 'Second';
                                               QUERY PLAN
---------------------------------------------------------------------------------------------------------
 Seq Scan on dates  (cost=0.00..470.38 rows=3276 width=150) (actual time=0.027..2.596 rows=3276 loops=1)
   Filter: (quarter_name = 'Second'::text)
   Rows Removed by Filter: 9874
 Planning Time: 0.065 ms
 Execution Time: 2.726 ms
(5 rows)

postgres=# set enable_seqscan TO off;
SET
postgres=# explain analyze select * from dates where quarter_name = 'Second';
                                                                    QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on dates  (cost=367.73..714.68 rows=3276 width=150) (actual time=1.064..1.584 rows=3276 loops=1)
   Recheck Cond: (quarter_name = 'Second'::text)
   Heap Blocks: exact=116
   ->  Bitmap Index Scan on dates_id_day_name_quarter_name_idx  (cost=0.00..366.91 rows=3276 width=0) (actual time=1.042..1.042 rows=3276 loops=1)
         Index Cond: (quarter_name = 'Second'::text)
 Planning Time: 0.066 ms
 Execution Time: 1.773 ms
(7 rows)

-- unwanted composite index with day_name and quarter_name, both having less distinct values
postgres=# create index on dates using btree(day_name, quarter_name);
CREATE INDEX
postgres=# vacuum ANALYZE dates;
VACUUM

-- since id is used, index scan
postgres=# explain analyze select * from dates where id = 1 and day_name = 'Wednesday' and quarter_name = 'Second';
                                                                 QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using dates_id_day_name_quarter_name_idx on dates  (cost=0.29..8.31 rows=1 width=150) (actual time=0.011..0.011 rows=0 loops=1)
   Index Cond: ((id = 1) AND (day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
 Planning Time: 0.072 ms
 Execution Time: 0.024 ms
(4 rows)


-- since id is used, index scan
postgres=# explain analyze select * from dates where id = 1 and day_name = 'Wednesday';
                                                                 QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using dates_id_day_name_quarter_name_idx on dates  (cost=0.29..8.30 rows=1 width=150) (actual time=0.013..0.014 rows=1 loops=1)
   Index Cond: ((id = 1) AND (day_name = 'Wednesday'::text))
 Planning Time: 0.083 ms
 Execution Time: 0.031 ms
(4 rows)


-- since no id, but day_name and quarter_name, but rows estimated are 468 / 13k , bitmap scan on index as
-- a plain index scan would do more work
postgres=# explain analyze select * from dates where day_name = 'Wednesday' and quarter_name = 'Second';
                                                                 QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
 Bitmap Heap Scan on dates  (cost=9.08..336.08 rows=468 width=150) (actual time=0.044..0.190 rows=468 loops=1)
   Recheck Cond: ((day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
   Heap Blocks: exact=105
   ->  Bitmap Index Scan on dates_day_name_quarter_name_idx  (cost=0.00..8.96 rows=468 width=0) (actual time=0.027..0.027 rows=468 loops=1)
         Index Cond: ((day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
 Planning Time: 0.080 ms
 Execution Time: 0.226 ms
(7 rows)

postgres=# explain analyze select * from dates where quarter_name = 'Second';
                                               QUERY PLAN
---------------------------------------------------------------------------------------------------------
 Seq Scan on dates  (cost=0.00..470.38 rows=3276 width=150) (actual time=0.017..1.561 rows=3276 loops=1)
   Filter: (quarter_name = 'Second'::text)
   Rows Removed by Filter: 9874
 Planning Time: 0.047 ms
 Execution Time: 1.638 ms
(5 rows)


-- create one more index, now using epoch as trailing column, but distinct
postgres=# create index on dates using btree(day_name, quarter_name, epoch);
CREATE INDEX
postgres=# vacuum ANALYZE dates;
VACUUM

-- index with epoch used, even when trailing as it is distinct
postgres=# explain analyze select * from dates where epoch = 1388534400;
                                                                   QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using dates_day_name_quarter_name_epoch_idx on dates  (cost=0.29..402.92 rows=1 width=150) (actual time=0.787..0.910 rows=1 loops=1)
   Index Cond: (epoch = 1388534400)
 Planning Time: 0.257 ms
 Execution Time: 0.926 ms
(4 rows)

-- same
postgres=# explain analyze select * from dates where quarter_name = 'Second' and epoch = 1388534400
postgres-# ;
                                                                   QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using dates_day_name_quarter_name_epoch_idx on dates  (cost=0.29..435.80 rows=1 width=150) (actual time=0.642..0.642 rows=0 loops=1)
   Index Cond: ((quarter_name = 'Second'::text) AND (epoch = 1388534400))
 Planning Time: 0.083 ms
 Execution Time: 0.659 ms
(4 rows)


-- conflicting case, both id and epoch are unique, but size of index with id is less hence epoch index not used
postgres=# explain analyze select * from dates where id = 1 and epoch = 1388534400;
                                                                 QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using dates_id_day_name_quarter_name_idx on dates  (cost=0.29..8.30 rows=1 width=150) (actual time=0.037..0.039 rows=1 loops=1)
   Index Cond: (id = 1)
   Filter: (epoch = 1388534400)
 Planning Time: 0.144 ms
 Execution Time: 0.075 ms
(5 rows)

postgres=# explain analyze select * from dates where day_name = 'Wednesday' and epoch = 1388534400;
                                                                   QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------
 Index Scan using dates_day_name_quarter_name_epoch_idx on dates  (cost=0.29..67.08 rows=1 width=150) (actual time=0.013..0.073 rows=1 loops=1)
   Index Cond: ((day_name = 'Wednesday'::text) AND (epoch = 1388534400))
 Planning Time: 0.054 ms
 Execution Time: 0.084 ms
(4 rows)
	\d+ dates
	Table "public.dates"
	Column \| Type \| Collation \| Nullable \| Default \| Storage \| Compression \| Stats target \| Description
	------------------------+---------+-----------+----------+-----------------------------------+----------+-------------+--------------+-------------
	id \| integer \| \| not null \| nextval('dates_id_seq'::regclass) \| plain \| \| \|
	date \| date \| \| \| \| plain \| \| \|
	epoch \| bigint \| \| \| \| plain \| \| \|
	day_suffix \| text \| \| \| \| extended \| pglz \| \|
	day_name \| text \| \| \| \| extended \| pglz \| \|
	day_of_week \| integer \| \| \| \| plain \| \| \|
	day_of_month \| integer \| \| \| \| plain \| \| \|
	day_of_quarter \| integer \| \| \| \| plain \| \| \|
	day_of_year \| integer \| \| \| \| plain \| \| \|
	week_of_month \| integer \| \| \| \| plain \| \| \|
	week_of_year \| integer \| \| \| \| plain \| \| \|
	week_of_year_iso \| text \| \| \| \| extended \| pglz \| \|
	month_actual \| integer \| \| \| \| plain \| \| \|
	month_name \| text \| \| \| \| extended \| pglz \| \|
	month_name_abbreviated \| text \| \| \| \| extended \| pglz \| \|
	quarter_actual \| integer \| \| \| \| plain \| \| \|
	quarter_name \| text \| \| \| \| extended \| pglz \| \|
	year_actual \| integer \| \| \| \| plain \| \| \|
	year_iso \| integer \| \| \| \| plain \| \| \|
	first_day_of_week \| date \| \| \| \| plain \| \| \|
	last_day_of_week \| date \| \| \| \| plain \| \| \|
	first_day_of_month \| date \| \| \| \| plain \| \| \|
	last_day_of_month \| date \| \| \| \| plain \| \| \|
	first_day_of_quarter \| date \| \| \| \| plain \| \| \|
	last_day_of_quarter \| date \| \| \| \| plain \| \| \|
	first_day_of_year \| date \| \| \| \| plain \| \| \|
	last_day_of_year \| date \| \| \| \| plain \| \| \|
	mmyyyy \| text \| \| \| \| extended \| pglz \| \|
	mmddyyyy \| text \| \| \| \| extended \| pglz \| \|
	weekend_indr \| boolean \| \| \| \| plain \| \| \|
	Indexes:
	"dates_pkey" PRIMARY KEY, btree (id)
	"dates_day_name_quarter_name_idx" btree (day_name, quarter_name)
	"dates_id_day_name_quarter_name_idx" btree (id, day_name, quarter_name)
	Triggers:
	insert_date_dimension_row BEFORE INSERT ON dates FOR EACH ROW EXECUTE FUNCTION insert_date_dimension_row()
	Access method: heap

	/*
	epoch - distinct value for each row
	day_name - 7 values distinct in 13k rows
	quarter_name - 4 values distinct in 13k rows
	id - distinct value for each row
	*/

	postgres=# select min(date), max(date), count(*) from dates;
	min \| max \| count
	------------+------------+-------
	2014-01-01 \| 2050-01-01 \| 13150
	(1 row)


	postgres=# select count(*), day_name from dates group by day_name;
	count \| day_name
	-------+-----------
	1879 \| Friday
	1878 \| Monday
	1879 \| Saturday
	1878 \| Sunday
	1879 \| Thursday
	1878 \| Tuesday
	1879 \| Wednesday
	(7 rows)

	postgres=# select count(*), quarter_name from dates group by quarter_name;
	count \| quarter_name
	-------+--------------
	3312 \| Fourth
	3276 \| Second
	3250 \| First
	3312 \| Third
	(4 rows)


	-- since id is part of predicate and leading column of the index, index is used
	postgres=# explain analyze select * from dates where id = 1 and day_name = 'Wednesday' and quarter_name = 'Second';
	QUERY PLAN
	--------------------------------------------------------------------------------------------------------------------------------------------
	Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.31 rows=1 width=150) (actual time=0.013..0.013 rows=0 loops=1)
	Index Cond: ((id = 1) AND (day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
	Planning Time: 0.088 ms
	Execution Time: 0.031 ms
	(4 rows)

	-- since no id, but only day_name and quarter_name, both which have less distinct values, seq is used.
	postgres=# explain analyze select * from dates where day_name = 'Wednesday' and quarter_name = 'Second';
	QUERY PLAN
	-------------------------------------------------------------------------------------------------------
	Seq Scan on dates (cost=0.00..503.25 rows=468 width=150) (actual time=0.029..2.673 rows=468 loops=1)
	Filter: ((day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
	Rows Removed by Filter: 12682
	Planning Time: 0.073 ms
	Execution Time: 2.707 ms
	(5 rows)


	postgres=# explain analyze select * from dates where id = 1 and quarter_name = 'Second';
	QUERY PLAN
	--------------------------------------------------------------------------------------------------------------------------------------------
	Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.30 rows=1 width=150) (actual time=0.011..0.012 rows=0 loops=1)
	Index Cond: ((id = 1) AND (quarter_name = 'Second'::text))
	Planning Time: 0.082 ms
	Execution Time: 0.029 ms
	(4 rows)

	postgres=# explain analyze select * from dates where quarter_name = 'Second';
	QUERY PLAN
	---------------------------------------------------------------------------------------------------------
	Seq Scan on dates (cost=0.00..470.38 rows=3276 width=150) (actual time=0.027..2.596 rows=3276 loops=1)
	Filter: (quarter_name = 'Second'::text)
	Rows Removed by Filter: 9874
	Planning Time: 0.065 ms
	Execution Time: 2.726 ms
	(5 rows)

	postgres=# set enable_seqscan TO off;
	SET
	postgres=# explain analyze select * from dates where quarter_name = 'Second';
	QUERY PLAN
	---------------------------------------------------------------------------------------------------------------------------------------------------
	Bitmap Heap Scan on dates (cost=367.73..714.68 rows=3276 width=150) (actual time=1.064..1.584 rows=3276 loops=1)
	Recheck Cond: (quarter_name = 'Second'::text)
	Heap Blocks: exact=116
	-> Bitmap Index Scan on dates_id_day_name_quarter_name_idx (cost=0.00..366.91 rows=3276 width=0) (actual time=1.042..1.042 rows=3276 loops=1)
	Index Cond: (quarter_name = 'Second'::text)
	Planning Time: 0.066 ms
	Execution Time: 1.773 ms
	(7 rows)

	-- unwanted composite index with day_name and quarter_name, both having less distinct values
	postgres=# create index on dates using btree(day_name, quarter_name);
	CREATE INDEX
	postgres=# vacuum ANALYZE dates;
	VACUUM

	-- since id is used, index scan
	postgres=# explain analyze select * from dates where id = 1 and day_name = 'Wednesday' and quarter_name = 'Second';
	QUERY PLAN
	--------------------------------------------------------------------------------------------------------------------------------------------
	Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.31 rows=1 width=150) (actual time=0.011..0.011 rows=0 loops=1)
	Index Cond: ((id = 1) AND (day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
	Planning Time: 0.072 ms
	Execution Time: 0.024 ms
	(4 rows)


	-- since id is used, index scan
	postgres=# explain analyze select * from dates where id = 1 and day_name = 'Wednesday';
	QUERY PLAN
	--------------------------------------------------------------------------------------------------------------------------------------------
	Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.30 rows=1 width=150) (actual time=0.013..0.014 rows=1 loops=1)
	Index Cond: ((id = 1) AND (day_name = 'Wednesday'::text))
	Planning Time: 0.083 ms
	Execution Time: 0.031 ms
	(4 rows)


	-- since no id, but day_name and quarter_name, but rows estimated are 468 / 13k , bitmap scan on index as
	-- a plain index scan would do more work
	postgres=# explain analyze select * from dates where day_name = 'Wednesday' and quarter_name = 'Second';
	QUERY PLAN
	--------------------------------------------------------------------------------------------------------------------------------------------
	Bitmap Heap Scan on dates (cost=9.08..336.08 rows=468 width=150) (actual time=0.044..0.190 rows=468 loops=1)
	Recheck Cond: ((day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
	Heap Blocks: exact=105
	-> Bitmap Index Scan on dates_day_name_quarter_name_idx (cost=0.00..8.96 rows=468 width=0) (actual time=0.027..0.027 rows=468 loops=1)
	Index Cond: ((day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text))
	Planning Time: 0.080 ms
	Execution Time: 0.226 ms
	(7 rows)

	postgres=# explain analyze select * from dates where quarter_name = 'Second';
	QUERY PLAN
	---------------------------------------------------------------------------------------------------------
	Seq Scan on dates (cost=0.00..470.38 rows=3276 width=150) (actual time=0.017..1.561 rows=3276 loops=1)
	Filter: (quarter_name = 'Second'::text)
	Rows Removed by Filter: 9874
	Planning Time: 0.047 ms
	Execution Time: 1.638 ms
	(5 rows)


	-- create one more index, now using epoch as trailing column, but distinct
	postgres=# create index on dates using btree(day_name, quarter_name, epoch);
	CREATE INDEX
	postgres=# vacuum ANALYZE dates;
	VACUUM

	-- index with epoch used, even when trailing as it is distinct
	postgres=# explain analyze select * from dates where epoch = 1388534400;
	QUERY PLAN
	-------------------------------------------------------------------------------------------------------------------------------------------------
	Index Scan using dates_day_name_quarter_name_epoch_idx on dates (cost=0.29..402.92 rows=1 width=150) (actual time=0.787..0.910 rows=1 loops=1)
	Index Cond: (epoch = 1388534400)
	Planning Time: 0.257 ms
	Execution Time: 0.926 ms
	(4 rows)

	-- same
	postgres=# explain analyze select * from dates where quarter_name = 'Second' and epoch = 1388534400
	postgres-# ;
	QUERY PLAN
	-------------------------------------------------------------------------------------------------------------------------------------------------
	Index Scan using dates_day_name_quarter_name_epoch_idx on dates (cost=0.29..435.80 rows=1 width=150) (actual time=0.642..0.642 rows=0 loops=1)
	Index Cond: ((quarter_name = 'Second'::text) AND (epoch = 1388534400))
	Planning Time: 0.083 ms
	Execution Time: 0.659 ms
	(4 rows)


	-- conflicting case, both id and epoch are unique, but size of index with id is less hence epoch index not used
	postgres=# explain analyze select * from dates where id = 1 and epoch = 1388534400;
	QUERY PLAN
	--------------------------------------------------------------------------------------------------------------------------------------------
	Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.30 rows=1 width=150) (actual time=0.037..0.039 rows=1 loops=1)
	Index Cond: (id = 1)
	Filter: (epoch = 1388534400)
	Planning Time: 0.144 ms
	Execution Time: 0.075 ms
	(5 rows)

	postgres=# explain analyze select * from dates where day_name = 'Wednesday' and epoch = 1388534400;
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------------------------------------------
	Index Scan using dates_day_name_quarter_name_epoch_idx on dates (cost=0.29..67.08 rows=1 width=150) (actual time=0.013..0.073 rows=1 loops=1)
	Index Cond: ((day_name = 'Wednesday'::text) AND (epoch = 1388534400))
	Planning Time: 0.054 ms
	Execution Time: 0.084 ms
	(4 rows)