Created
July 18, 2021 20:10
-
-
Save cabecada/0f206ca51dafbb210b259f08d4609ae0 to your computer and use it in GitHub Desktop.
postgres_composite_index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\d+ dates | |
Table "public.dates" | |
Column | Type | Collation | Nullable | Default | Storage | Compression | Stats target | Description | |
------------------------+---------+-----------+----------+-----------------------------------+----------+-------------+--------------+------------- | |
id | integer | | not null | nextval('dates_id_seq'::regclass) | plain | | | | |
date | date | | | | plain | | | | |
epoch | bigint | | | | plain | | | | |
day_suffix | text | | | | extended | pglz | | | |
day_name | text | | | | extended | pglz | | | |
day_of_week | integer | | | | plain | | | | |
day_of_month | integer | | | | plain | | | | |
day_of_quarter | integer | | | | plain | | | | |
day_of_year | integer | | | | plain | | | | |
week_of_month | integer | | | | plain | | | | |
week_of_year | integer | | | | plain | | | | |
week_of_year_iso | text | | | | extended | pglz | | | |
month_actual | integer | | | | plain | | | | |
month_name | text | | | | extended | pglz | | | |
month_name_abbreviated | text | | | | extended | pglz | | | |
quarter_actual | integer | | | | plain | | | | |
quarter_name | text | | | | extended | pglz | | | |
year_actual | integer | | | | plain | | | | |
year_iso | integer | | | | plain | | | | |
first_day_of_week | date | | | | plain | | | | |
last_day_of_week | date | | | | plain | | | | |
first_day_of_month | date | | | | plain | | | | |
last_day_of_month | date | | | | plain | | | | |
first_day_of_quarter | date | | | | plain | | | | |
last_day_of_quarter | date | | | | plain | | | | |
first_day_of_year | date | | | | plain | | | | |
last_day_of_year | date | | | | plain | | | | |
mmyyyy | text | | | | extended | pglz | | | |
mmddyyyy | text | | | | extended | pglz | | | |
weekend_indr | boolean | | | | plain | | | | |
Indexes: | |
"dates_pkey" PRIMARY KEY, btree (id) | |
"dates_day_name_quarter_name_idx" btree (day_name, quarter_name) | |
"dates_id_day_name_quarter_name_idx" btree (id, day_name, quarter_name) | |
Triggers: | |
insert_date_dimension_row BEFORE INSERT ON dates FOR EACH ROW EXECUTE FUNCTION insert_date_dimension_row() | |
Access method: heap | |
/* | |
epoch - distinct value for each row | |
day_name - 7 values distinct in 13k rows | |
quarter_name - 4 values distinct in 13k rows | |
id - distinct value for each row | |
*/ | |
postgres=# select min(date), max(date), count(*) from dates; | |
min | max | count | |
------------+------------+------- | |
2014-01-01 | 2050-01-01 | 13150 | |
(1 row) | |
postgres=# select count(*), day_name from dates group by day_name; | |
count | day_name | |
-------+----------- | |
1879 | Friday | |
1878 | Monday | |
1879 | Saturday | |
1878 | Sunday | |
1879 | Thursday | |
1878 | Tuesday | |
1879 | Wednesday | |
(7 rows) | |
postgres=# select count(*), quarter_name from dates group by quarter_name; | |
count | quarter_name | |
-------+-------------- | |
3312 | Fourth | |
3276 | Second | |
3250 | First | |
3312 | Third | |
(4 rows) | |
-- since id is part of predicate and leading column of the index, index is used | |
postgres=# explain analyze select * from dates where id = 1 and day_name = 'Wednesday' and quarter_name = 'Second'; | |
QUERY PLAN | |
-------------------------------------------------------------------------------------------------------------------------------------------- | |
Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.31 rows=1 width=150) (actual time=0.013..0.013 rows=0 loops=1) | |
Index Cond: ((id = 1) AND (day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text)) | |
Planning Time: 0.088 ms | |
Execution Time: 0.031 ms | |
(4 rows) | |
-- since no id, but only day_name and quarter_name, both which have less distinct values, seq is used. | |
postgres=# explain analyze select * from dates where day_name = 'Wednesday' and quarter_name = 'Second'; | |
QUERY PLAN | |
------------------------------------------------------------------------------------------------------- | |
Seq Scan on dates (cost=0.00..503.25 rows=468 width=150) (actual time=0.029..2.673 rows=468 loops=1) | |
Filter: ((day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text)) | |
Rows Removed by Filter: 12682 | |
Planning Time: 0.073 ms | |
Execution Time: 2.707 ms | |
(5 rows) | |
postgres=# explain analyze select * from dates where id = 1 and quarter_name = 'Second'; | |
QUERY PLAN | |
-------------------------------------------------------------------------------------------------------------------------------------------- | |
Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.30 rows=1 width=150) (actual time=0.011..0.012 rows=0 loops=1) | |
Index Cond: ((id = 1) AND (quarter_name = 'Second'::text)) | |
Planning Time: 0.082 ms | |
Execution Time: 0.029 ms | |
(4 rows) | |
postgres=# explain analyze select * from dates where quarter_name = 'Second'; | |
QUERY PLAN | |
--------------------------------------------------------------------------------------------------------- | |
Seq Scan on dates (cost=0.00..470.38 rows=3276 width=150) (actual time=0.027..2.596 rows=3276 loops=1) | |
Filter: (quarter_name = 'Second'::text) | |
Rows Removed by Filter: 9874 | |
Planning Time: 0.065 ms | |
Execution Time: 2.726 ms | |
(5 rows) | |
postgres=# set enable_seqscan TO off; | |
SET | |
postgres=# explain analyze select * from dates where quarter_name = 'Second'; | |
QUERY PLAN | |
--------------------------------------------------------------------------------------------------------------------------------------------------- | |
Bitmap Heap Scan on dates (cost=367.73..714.68 rows=3276 width=150) (actual time=1.064..1.584 rows=3276 loops=1) | |
Recheck Cond: (quarter_name = 'Second'::text) | |
Heap Blocks: exact=116 | |
-> Bitmap Index Scan on dates_id_day_name_quarter_name_idx (cost=0.00..366.91 rows=3276 width=0) (actual time=1.042..1.042 rows=3276 loops=1) | |
Index Cond: (quarter_name = 'Second'::text) | |
Planning Time: 0.066 ms | |
Execution Time: 1.773 ms | |
(7 rows) | |
-- unwanted composite index with day_name and quarter_name, both having less distinct values | |
postgres=# create index on dates using btree(day_name, quarter_name); | |
CREATE INDEX | |
postgres=# vacuum ANALYZE dates; | |
VACUUM | |
-- since id is used, index scan | |
postgres=# explain analyze select * from dates where id = 1 and day_name = 'Wednesday' and quarter_name = 'Second'; | |
QUERY PLAN | |
-------------------------------------------------------------------------------------------------------------------------------------------- | |
Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.31 rows=1 width=150) (actual time=0.011..0.011 rows=0 loops=1) | |
Index Cond: ((id = 1) AND (day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text)) | |
Planning Time: 0.072 ms | |
Execution Time: 0.024 ms | |
(4 rows) | |
-- since id is used, index scan | |
postgres=# explain analyze select * from dates where id = 1 and day_name = 'Wednesday'; | |
QUERY PLAN | |
-------------------------------------------------------------------------------------------------------------------------------------------- | |
Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.30 rows=1 width=150) (actual time=0.013..0.014 rows=1 loops=1) | |
Index Cond: ((id = 1) AND (day_name = 'Wednesday'::text)) | |
Planning Time: 0.083 ms | |
Execution Time: 0.031 ms | |
(4 rows) | |
-- since no id, but day_name and quarter_name, but rows estimated are 468 / 13k , bitmap scan on index as | |
-- a plain index scan would do more work | |
postgres=# explain analyze select * from dates where day_name = 'Wednesday' and quarter_name = 'Second'; | |
QUERY PLAN | |
-------------------------------------------------------------------------------------------------------------------------------------------- | |
Bitmap Heap Scan on dates (cost=9.08..336.08 rows=468 width=150) (actual time=0.044..0.190 rows=468 loops=1) | |
Recheck Cond: ((day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text)) | |
Heap Blocks: exact=105 | |
-> Bitmap Index Scan on dates_day_name_quarter_name_idx (cost=0.00..8.96 rows=468 width=0) (actual time=0.027..0.027 rows=468 loops=1) | |
Index Cond: ((day_name = 'Wednesday'::text) AND (quarter_name = 'Second'::text)) | |
Planning Time: 0.080 ms | |
Execution Time: 0.226 ms | |
(7 rows) | |
postgres=# explain analyze select * from dates where quarter_name = 'Second'; | |
QUERY PLAN | |
--------------------------------------------------------------------------------------------------------- | |
Seq Scan on dates (cost=0.00..470.38 rows=3276 width=150) (actual time=0.017..1.561 rows=3276 loops=1) | |
Filter: (quarter_name = 'Second'::text) | |
Rows Removed by Filter: 9874 | |
Planning Time: 0.047 ms | |
Execution Time: 1.638 ms | |
(5 rows) | |
-- create one more index, now using epoch as trailing column, but distinct | |
postgres=# create index on dates using btree(day_name, quarter_name, epoch); | |
CREATE INDEX | |
postgres=# vacuum ANALYZE dates; | |
VACUUM | |
-- index with epoch used, even when trailing as it is distinct | |
postgres=# explain analyze select * from dates where epoch = 1388534400; | |
QUERY PLAN | |
------------------------------------------------------------------------------------------------------------------------------------------------- | |
Index Scan using dates_day_name_quarter_name_epoch_idx on dates (cost=0.29..402.92 rows=1 width=150) (actual time=0.787..0.910 rows=1 loops=1) | |
Index Cond: (epoch = 1388534400) | |
Planning Time: 0.257 ms | |
Execution Time: 0.926 ms | |
(4 rows) | |
-- same | |
postgres=# explain analyze select * from dates where quarter_name = 'Second' and epoch = 1388534400 | |
postgres-# ; | |
QUERY PLAN | |
------------------------------------------------------------------------------------------------------------------------------------------------- | |
Index Scan using dates_day_name_quarter_name_epoch_idx on dates (cost=0.29..435.80 rows=1 width=150) (actual time=0.642..0.642 rows=0 loops=1) | |
Index Cond: ((quarter_name = 'Second'::text) AND (epoch = 1388534400)) | |
Planning Time: 0.083 ms | |
Execution Time: 0.659 ms | |
(4 rows) | |
-- conflicting case, both id and epoch are unique, but size of index with id is less hence epoch index not used | |
postgres=# explain analyze select * from dates where id = 1 and epoch = 1388534400; | |
QUERY PLAN | |
-------------------------------------------------------------------------------------------------------------------------------------------- | |
Index Scan using dates_id_day_name_quarter_name_idx on dates (cost=0.29..8.30 rows=1 width=150) (actual time=0.037..0.039 rows=1 loops=1) | |
Index Cond: (id = 1) | |
Filter: (epoch = 1388534400) | |
Planning Time: 0.144 ms | |
Execution Time: 0.075 ms | |
(5 rows) | |
postgres=# explain analyze select * from dates where day_name = 'Wednesday' and epoch = 1388534400; | |
QUERY PLAN | |
------------------------------------------------------------------------------------------------------------------------------------------------ | |
Index Scan using dates_day_name_quarter_name_epoch_idx on dates (cost=0.29..67.08 rows=1 width=150) (actual time=0.013..0.073 rows=1 loops=1) | |
Index Cond: ((day_name = 'Wednesday'::text) AND (epoch = 1388534400)) | |
Planning Time: 0.054 ms | |
Execution Time: 0.084 ms | |
(4 rows) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment