Skip to content

Instantly share code, notes, and snippets.

@benoittgt
Last active March 6, 2024 13:10
Show Gist options
  • Save benoittgt/ab72dc4cfedea2a0c6a5ee809d16e04d to your computer and use it in GitHub Desktop.
Save benoittgt/ab72dc4cfedea2a0c6a5ee809d16e04d to your computer and use it in GitHub Desktop.
-- Carefull setup script is slow
-- SETUP SCRIPT :: START --
DROP TABLE IF EXISTS docs;
CREATE TABLE docs (
id SERIAL PRIMARY KEY,
type varchar(40) DEFAULT 'pdf' NOT NULL,
status varchar(40) NOT NULL,
sender_reference varchar(40) NOT NULL,
sent_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now() NOT NULL
);
INSERT INTO
docs (type, status, sender_reference, sent_at)
SELECT
('{pdf,doc,raw}'::text[])[ceil(random()*3)],
('{sent,draft,suspended}'::text[])[ceil(random()*3)],
('{Custom,Client}'::text[])[ceil(random()*2)] || '/' || floor(random() * 2000),
(LOCALTIMESTAMP - interval '2 years' * random())::timestamptz
FROM generate_series(1, 60000000) g;
CREATE INDEX docs_sent_at_idx ON docs USING btree (sender_reference, status, sent_at DESC NULLS LAST);
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status);
CREATE INDEX docs_sent_at_idx_2 ON docs USING btree (sender_reference, sent_at DESC NULLS LAST);
VACUUM(ANALYZE) docs;
-- SETUP SCRIPT :: END --
SELECT sender_reference, COUNT(*) FROM docs GROUP BY sender_reference HAVING COUNT(*) > 1000 ORDER BY 2 LIMIT 10; -- grab sender_reference with some documents if needed
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;
/*
Limit (cost=0.56..26727.94 rows=20 width=38) (actual time=116.926..218.789 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Buffers: shared hit=1421 read=45046
-> Index Scan using docs_sent_at_idx_1 on public.docs (cost=0.56..39748945.58 rows=29744 width=38) (actual time=116.924..218.784 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Filter: (((docs.status)::text = ANY ('{draft,sent}'::text[])) AND ((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])))
Rows Removed by Filter: 46354
Buffers: shared hit=1421 read=45046
Query Identifier: -4773896564660341574
Planning Time: 0.282 ms
Execution Time: 218.817 ms
*/
DROP INDEX docs_sent_at_idx_1;
-- redo the query without the index used
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;
-- with warm cache, very expensive read, all rows returned then sorted
/*
Limit (cost=97013.42..97013.47 rows=20 width=38) (actual time=98.236..98.237 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Buffers: shared hit=15 read=29030
-> Sort (cost=97013.42..97087.78 rows=29744 width=38) (actual time=98.234..98.235 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Sort Key: docs.sent_at DESC NULLS LAST
Sort Method: top-N heapsort Memory: 27kB
Buffers: shared hit=15 read=29030
-> Bitmap Heap Scan on public.docs (cost=524.25..96221.94 rows=29744 width=38) (actual time=22.013..95.987 rows=29834 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Recheck Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
Heap Blocks: exact=28973
Buffers: shared hit=15 read=29030
-> Bitmap Index Scan on docs_sent_at_idx_3 (cost=0.00..516.81 rows=29744 width=0) (actual time=13.608..13.608 rows=29834 loops=1)
Index Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
Buffers: shared hit=15 read=57
Query Identifier: -4773896564660341574
Planning Time: 0.312 ms
Execution Time: 98.320 ms
*/
-- Recreate previous deleted index
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status);
VACUUM(ANALYZE) docs;
-- Decompose using UNION ALL. Result will be a little bit different (I think because of sent_at insertion :thinking:)
-- but it is the same amount of docs than the previous query.
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT *
FROM
(
(
(
SELECT * FROM docs
WHERE status = 'draft'
AND sender_reference = 'Custom/1175'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'sent'
AND sender_reference = 'Custom/1175'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'draft'
AND sender_reference = 'Client/362'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'sent'
AND sender_reference = 'Client/362'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'draft'
AND sender_reference = 'Custom/280'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'sent'
AND sender_reference = 'Custom/280'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
)
) docs
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0;
-- Fast query with very low usage of IO or disk read. Good usage of index.
/*
Limit (cost=3.47..84.20 rows=20 width=38) (actual time=4.340..4.398 rows=20 loops=1)
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at
Buffers: shared hit=36 read=13
-> Merge Append (cost=3.47..487.86 rows=120 width=38) (actual time=4.338..4.393 rows=20 loops=1)
Sort Key: docs_1.sent_at DESC NULLS LAST
Buffers: shared hit=36 read=13
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=2.081..2.101 rows=9 loops=1)
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at
Buffers: shared hit=9 read=4
-> Index Scan using docs_sent_at_idx on public.docs docs_1 (cost=0.56..19763.16 rows=4930 width=38) (actual time=2.079..2.098 rows=9 loops=1)
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at
Index Cond: (((docs_1.sender_reference)::text = 'Custom/1175'::text) AND ((docs_1.status)::text = 'draft'::text))
Buffers: shared hit=9 read=4
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.204..0.215 rows=6 loops=1)
Output: docs_2.id, docs_2.type, docs_2.status, docs_2.sender_reference, docs_2.sent_at, docs_2.created_at
Buffers: shared hit=9 read=1
-> Index Scan using docs_sent_at_idx on public.docs docs_2 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.202..0.212 rows=6 loops=1)
Output: docs_2.id, docs_2.type, docs_2.status, docs_2.sender_reference, docs_2.sent_at, docs_2.created_at
Index Cond: (((docs_2.sender_reference)::text = 'Custom/1175'::text) AND ((docs_2.status)::text = 'sent'::text))
Buffers: shared hit=9 read=1
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.755..0.756 rows=1 loops=1)
Output: docs_3.id, docs_3.type, docs_3.status, docs_3.sender_reference, docs_3.sent_at, docs_3.created_at
Buffers: shared hit=2 read=3
-> Index Scan using docs_sent_at_idx on public.docs docs_3 (cost=0.56..19763.16 rows=4930 width=38) (actual time=0.754..0.754 rows=1 loops=1)
Output: docs_3.id, docs_3.type, docs_3.status, docs_3.sender_reference, docs_3.sent_at, docs_3.created_at
Index Cond: (((docs_3.sender_reference)::text = 'Client/362'::text) AND ((docs_3.status)::text = 'draft'::text))
Buffers: shared hit=2 read=3
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.195..0.200 rows=3 loops=1)
Output: docs_4.id, docs_4.type, docs_4.status, docs_4.sender_reference, docs_4.sent_at, docs_4.created_at
Buffers: shared hit=6 read=1
-> Index Scan using docs_sent_at_idx on public.docs docs_4 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.194..0.198 rows=3 loops=1)
Output: docs_4.id, docs_4.type, docs_4.status, docs_4.sender_reference, docs_4.sent_at, docs_4.created_at
Index Cond: (((docs_4.sender_reference)::text = 'Client/362'::text) AND ((docs_4.status)::text = 'sent'::text))
Buffers: shared hit=6 read=1
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.888..0.897 rows=5 loops=1)
Output: docs_5.id, docs_5.type, docs_5.status, docs_5.sender_reference, docs_5.sent_at, docs_5.created_at
Buffers: shared hit=6 read=3
-> Index Scan using docs_sent_at_idx on public.docs docs_5 (cost=0.56..19763.16 rows=4930 width=38) (actual time=0.887..0.895 rows=5 loops=1)
Output: docs_5.id, docs_5.type, docs_5.status, docs_5.sender_reference, docs_5.sent_at, docs_5.created_at
Index Cond: (((docs_5.sender_reference)::text = 'Custom/280'::text) AND ((docs_5.status)::text = 'draft'::text))
Buffers: shared hit=6 read=3
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.207..0.208 rows=1 loops=1)
Output: docs_6.id, docs_6.type, docs_6.status, docs_6.sender_reference, docs_6.sent_at, docs_6.created_at
Buffers: shared hit=4 read=1
-> Index Scan using docs_sent_at_idx on public.docs docs_6 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.206..0.206 rows=1 loops=1)
Output: docs_6.id, docs_6.type, docs_6.status, docs_6.sender_reference, docs_6.sent_at, docs_6.created_at
Index Cond: (((docs_6.sender_reference)::text = 'Custom/280'::text) AND ((docs_6.status)::text = 'sent'::text))
Buffers: shared hit=4 read=1
Query Identifier: -998395605132531303
Planning:
Buffers: shared hit=5
Planning Time: 1.633 ms
Execution Time: 4.498 ms
*/
SELECT version();
-- PostgreSQL 14.8 (Homebrew) on aarch64-apple-darwin22.4.0, compiled by Apple clang version 14.0.3 (clang-1403.0.22.14.1), 64-bit
@andyatkinson
Copy link

Nice one @FranckPachot and thanks for writing this up @benoittgt!

@benoittgt
Copy link
Author

Hello

A new benchmark made on saop-dynamic-skip-v11.45. Same as initial post with this setup and query:

- Carefull setup script is slow
-- SETUP SCRIPT :: START --
DROP TABLE IF EXISTS docs;
CREATE TABLE docs (
  id SERIAL PRIMARY KEY,
  type varchar(40) DEFAULT 'pdf' NOT NULL,
  status varchar(40) NOT NULL,
  sender_reference varchar(40) NOT NULL,
  sent_at TIMESTAMPTZ,
  created_at TIMESTAMPTZ DEFAULT now() NOT NULL
);


INSERT INTO
  docs (type, status, sender_reference, sent_at)
SELECT
  ('{pdf,doc,raw}'::text[])[ceil(random()*3)],
  ('{sent,draft,suspended}'::text[])[ceil(random()*3)],
  ('{Custom,Client}'::text[])[ceil(random()*2)] || '/' || floor(random() * 2000),
  (LOCALTIMESTAMP - interval '2 years' * random())::timestamptz
FROM generate_series(1, 60000000) g;

CREATE INDEX docs_sent_at_idx ON docs USING btree (sender_reference, status, sent_at DESC NULLS LAST);
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status); 
CREATE INDEX docs_sent_at_idx_2 ON docs USING btree (sender_reference, sent_at DESC NULLS LAST);

VACUUM(ANALYZE) docs;
-- SETUP SCRIPT :: END --

SELECT sender_reference, COUNT(*) FROM docs GROUP BY sender_reference HAVING COUNT(*) > 1000 ORDER BY 2 LIMIT 10; -- grab sender_reference with some documents if needed

EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;

result.. It's still very good

                                                                              QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------
 Limit  (cost=0.56..1415.02 rows=20 width=41) (actual time=2.508..10.596 rows=20 loops=1)
   Output: id, type, status, sender_reference, sent_at, created_at
   Buffers: shared hit=4 read=214
   ->  Index Scan using docs_sent_at_idx_1 on public.docs  (cost=0.56..2119638.18 rows=29971 width=41) (actual time=2.506..10.590 rows=20 loops=1)
         Output: id, type, status, sender_reference, sent_at, created_at
         Index Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
         Buffers: shared hit=4 read=214
 Planning:
   Buffers: shared hit=58
 Planning Time: 0.299 ms
 Execution Time: 10.617 ms
(11 rows)

There are many benchmarks in the Peter's contribution now. My benchmark is probably not really usefull now but for us (my company) it's good way to track the progress of the great contributions of Peter.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment