Skip to content

Instantly share code, notes, and snippets.

@io7m
Created February 9, 2022 15:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save io7m/899f982fcafa1f9e21cafe399155527d to your computer and use it in GitHub Desktop.
Save io7m/899f982fcafa1f9e21cafe399155527d to your computer and use it in GitHub Desktop.
SELECT count(*) AS count_1
FROM (SELECT works.id AS works_id,
works.presentation_edition_id AS works_presentation_edition_id,
works.audience AS works_audience,
works.target_age AS works_target_age,
works.fiction AS works_fiction,
works.summary_id AS works_summary_id,
works.summary_text AS works_summary_text,
works.quality AS works_quality,
works.rating AS works_rating,
works.popularity AS works_popularity,
works.primary_appeal AS works_primary_appeal,
works.secondary_appeal AS works_secondary_appeal,
works.appeal_character AS works_appeal_character,
works.appeal_language AS works_appeal_language,
works.appeal_setting AS works_appeal_setting,
works.appeal_story AS works_appeal_story,
works.last_update_time AS works_last_update_time,
works.presentation_ready AS works_presentation_ready,
works.presentation_ready_attempt AS works_presentation_ready_attempt,
works.presentation_ready_exception AS works_presentation_ready_exception,
works.simple_opds_entry AS works_simple_opds_entry,
works.verbose_opds_entry AS works_verbose_opds_entry,
works.marc_record AS works_marc_record
FROM works LEFT OUTER JOIN workcoveragerecords
ON works.id = workcoveragerecords.work_id
AND workcoveragerecords.operation = 'update-search-index'
WHERE (workcoveragerecords.id IS NULL OR workcoveragerecords.status NOT IN ('success', 'persistent failure'))
AND works.presentation_ready = true
ORDER BY works.id, workcoveragerecords.id
LIMIT 1000000 OFFSET 0) AS anon_1
;
-- Aggregate (cost=5612.73..5612.74 rows=1 width=8) (actual time=35.427..35.431 rows=1 loops=1)
-- -> Limit (cost=5578.45..5584.17 rows=2285 width=326) (actual time=35.420..35.424 rows=0 loops=1)
-- -> Sort (cost=5578.45..5584.17 rows=2285 width=326) (actual time=35.418..35.422 rows=0 loops=1)
-- Sort Key: works.id, workcoveragerecords.id
-- Sort Method: quicksort Memory: 25kB
-- -> Hash Left Join (cost=2106.16..5450.97 rows=2285 width=326) (actual time=35.411..35.414 rows=0 loops=1)
-- Hash Cond: (works.id = workcoveragerecords.work_id)
-- Filter: ((workcoveragerecords.id IS NULL) OR (workcoveragerecords.status <> ALL ('{success,"persistent failure"}'::coverage_status[])))
-- Rows Removed by Filter: 15509
-- -> Seq Scan on works (cost=0.00..3304.09 rows=15509 width=4) (actual time=0.029..12.246 rows=15509 loops=1)
-- Filter: presentation_ready
-- -> Hash (cost=1946.33..1946.33 rows=12786 width=12) (actual time=14.502..14.503 rows=15509 loops=1)
-- Buckets: 16384 Batches: 1 Memory Usage: 795kB
-- -> Bitmap Heap Scan on workcoveragerecords (cost=667.51..1946.33 rows=12786 width=12) (actual time=2.926..8.617 rows=15509 loops=1)
-- Recheck Cond: ((operation)::text = 'update-search-index'::text)
-- Heap Blocks: exact=287
-- -> Bitmap Index Scan on ix_workcoveragerecords_operation (cost=0.00..664.31 rows=12786 width=0) (actual time=2.826..2.826 rows=15509 loops=1)
-- Index Cond: ((operation)::text = 'update-search-index'::text)
-- Planning Time: 0.796 ms
-- Execution Time: 35.530 ms
--(20 rows)
-- -----------------------------------------------------------------------------------------------------------------
--
-- Remove the outer count() as a first step to make sure we're measuring the inner query.
--
SELECT
works.id AS works_id,
works.presentation_edition_id AS works_presentation_edition_id,
works.audience AS works_audience,
works.target_age AS works_target_age,
works.fiction AS works_fiction,
works.summary_id AS works_summary_id,
works.summary_text AS works_summary_text,
works.quality AS works_quality,
works.rating AS works_rating,
works.popularity AS works_popularity,
works.primary_appeal AS works_primary_appeal,
works.secondary_appeal AS works_secondary_appeal,
works.appeal_character AS works_appeal_character,
works.appeal_language AS works_appeal_language,
works.appeal_setting AS works_appeal_setting,
works.appeal_story AS works_appeal_story,
works.last_update_time AS works_last_update_time,
works.presentation_ready AS works_presentation_ready,
works.presentation_ready_attempt AS works_presentation_ready_attempt,
works.presentation_ready_exception AS works_presentation_ready_exception,
works.simple_opds_entry AS works_simple_opds_entry,
works.verbose_opds_entry AS works_verbose_opds_entry,
works.marc_record AS works_marc_record
FROM works LEFT OUTER JOIN workcoveragerecords
ON works.id = workcoveragerecords.work_id
AND workcoveragerecords.operation = 'update-search-index'
WHERE (workcoveragerecords.id IS NULL OR workcoveragerecords.status NOT IN ('success', 'persistent failure'))
AND works.presentation_ready = true
ORDER BY works.id, workcoveragerecords.id
LIMIT 1000000 OFFSET 0
;
-- Limit (cost=5578.45..5584.17 rows=2285 width=1478) (actual time=34.156..34.160 rows=0 loops=1)
-- -> Sort (cost=5578.45..5584.17 rows=2285 width=1478) (actual time=34.154..34.157 rows=0 loops=1)
-- Sort Key: works.id, workcoveragerecords.id
-- Sort Method: quicksort Memory: 25kB
-- -> Hash Left Join (cost=2106.16..5450.97 rows=2285 width=1478) (actual time=34.147..34.150 rows=0 loops=1)
-- Hash Cond: (works.id = workcoveragerecords.work_id)
-- Filter: ((workcoveragerecords.id IS NULL) OR (workcoveragerecords.status <> ALL ('{success,"persistent failure"}'::coverage_status[])))
-- Rows Removed by Filter: 15509
-- -> Seq Scan on works (cost=0.00..3304.09 rows=15509 width=1474) (actual time=0.008..12.233 rows=15509 loops=1)
-- Filter: presentation_ready
-- -> Hash (cost=1946.33..1946.33 rows=12786 width=12) (actual time=13.517..13.519 rows=15509 loops=1)
-- Buckets: 16384 Batches: 1 Memory Usage: 795kB
-- -> Bitmap Heap Scan on workcoveragerecords (cost=667.51..1946.33 rows=12786 width=12) (actual time=2.281..7.816 rows=15509 loops=1)
-- Recheck Cond: ((operation)::text = 'update-search-index'::text)
-- Heap Blocks: exact=287
-- -> Bitmap Index Scan on ix_workcoveragerecords_operation (cost=0.00..664.31 rows=12786 width=0) (actual time=2.215..2.215 rows=15509 loops=1)
-- Index Cond: ((operation)::text = 'update-search-index'::text)
-- Planning Time: 0.650 ms
-- Execution Time: 34.344 ms
-- -----------------------------------------------------------------------------------------------------------------
-- Remove the limit and offset. This doesn't change much, but simplifies the analysis a bit.
--
SELECT
works.id AS works_id,
works.presentation_edition_id AS works_presentation_edition_id,
works.audience AS works_audience,
works.target_age AS works_target_age,
works.fiction AS works_fiction,
works.summary_id AS works_summary_id,
works.summary_text AS works_summary_text,
works.quality AS works_quality,
works.rating AS works_rating,
works.popularity AS works_popularity,
works.primary_appeal AS works_primary_appeal,
works.secondary_appeal AS works_secondary_appeal,
works.appeal_character AS works_appeal_character,
works.appeal_language AS works_appeal_language,
works.appeal_setting AS works_appeal_setting,
works.appeal_story AS works_appeal_story,
works.last_update_time AS works_last_update_time,
works.presentation_ready AS works_presentation_ready,
works.presentation_ready_attempt AS works_presentation_ready_attempt,
works.presentation_ready_exception AS works_presentation_ready_exception,
works.simple_opds_entry AS works_simple_opds_entry,
works.verbose_opds_entry AS works_verbose_opds_entry,
works.marc_record AS works_marc_record
FROM works LEFT OUTER JOIN workcoveragerecords
ON works.id = workcoveragerecords.work_id
AND workcoveragerecords.operation = 'update-search-index'
WHERE (workcoveragerecords.id IS NULL OR workcoveragerecords.status NOT IN ('success', 'persistent failure'))
AND works.presentation_ready = true
ORDER BY works.id, workcoveragerecords.id
;
-- Sort (cost=5578.45..5584.17 rows=2285 width=1478) (actual time=35.586..35.590 rows=0 loops=1)
-- Sort Key: works.id, workcoveragerecords.id
-- Sort Method: quicksort Memory: 25kB
-- -> Hash Left Join (cost=2106.16..5450.97 rows=2285 width=1478) (actual time=35.579..35.582 rows=0 loops=1)
-- Hash Cond: (works.id = workcoveragerecords.work_id)
-- Filter: ((workcoveragerecords.id IS NULL) OR (workcoveragerecords.status <> ALL ('{success,"persistent failure"}'::coverage_status[])))
-- Rows Removed by Filter: 15509
-- -> Seq Scan on works (cost=0.00..3304.09 rows=15509 width=1474) (actual time=0.008..12.502 rows=15509 loops=1)
-- Filter: presentation_ready
-- -> Hash (cost=1946.33..1946.33 rows=12786 width=12) (actual time=14.123..14.124 rows=15509 loops=1)
-- Buckets: 16384 Batches: 1 Memory Usage: 795kB
-- -> Bitmap Heap Scan on workcoveragerecords (cost=667.51..1946.33 rows=12786 width=12) (actual time=2.666..8.116 rows=15509 loops=1)
-- Recheck Cond: ((operation)::text = 'update-search-index'::text)
-- Heap Blocks: exact=287
-- -> Bitmap Index Scan on ix_workcoveragerecords_operation (cost=0.00..664.31 rows=12786 width=0) (actual time=2.601..2.601 rows=15509 loops=1)
-- Index Cond: ((operation)::text = 'update-search-index'::text)
-- Planning Time: 0.704 ms
-- Execution Time: 35.787 ms
-- -----------------------------------------------------------------------------------------------------------------
-- Eliminate sorting.
-- Note that now the query takes the same time in the average case, but the lower bound on the cost has
-- been reduced by half. If consumers aren't depending on ordering, that could pay off later.
--
SELECT works.id AS works_id,
works.presentation_edition_id AS works_presentation_edition_id,
works.audience AS works_audience,
works.target_age AS works_target_age,
works.fiction AS works_fiction,
works.summary_id AS works_summary_id,
works.summary_text AS works_summary_text,
works.quality AS works_quality,
works.rating AS works_rating,
works.popularity AS works_popularity,
works.primary_appeal AS works_primary_appeal,
works.secondary_appeal AS works_secondary_appeal,
works.appeal_character AS works_appeal_character,
works.appeal_language AS works_appeal_language,
works.appeal_setting AS works_appeal_setting,
works.appeal_story AS works_appeal_story,
works.last_update_time AS works_last_update_time,
works.presentation_ready AS works_presentation_ready,
works.presentation_ready_attempt AS works_presentation_ready_attempt,
works.presentation_ready_exception AS works_presentation_ready_exception,
works.simple_opds_entry AS works_simple_opds_entry,
works.verbose_opds_entry AS works_verbose_opds_entry,
works.marc_record AS works_marc_record
FROM works LEFT OUTER JOIN workcoveragerecords
ON works.id = workcoveragerecords.work_id
AND workcoveragerecords.operation = 'update-search-index'
WHERE (workcoveragerecords.id IS NULL OR workcoveragerecords.status NOT IN ('success', 'persistent failure'))
AND works.presentation_ready = true
;
-- Hash Left Join (cost=2106.16..5450.97 rows=2285 width=1474) (actual time=34.755..34.758 rows=0 loops=1)
-- Hash Cond: (works.id = workcoveragerecords.work_id)
-- Filter: ((workcoveragerecords.id IS NULL) OR (workcoveragerecords.status <> ALL ('{success,"persistent failure"}'::coverage_status[])))
-- Rows Removed by Filter: 15509
-- -> Seq Scan on works (cost=0.00..3304.09 rows=15509 width=1474) (actual time=0.009..12.361 rows=15509 loops=1)
-- Filter: presentation_ready
-- -> Hash (cost=1946.33..1946.33 rows=12786 width=12) (actual time=13.844..13.845 rows=15509 loops=1)
-- Buckets: 16384 Batches: 1 Memory Usage: 795kB
-- -> Bitmap Heap Scan on workcoveragerecords (cost=667.51..1946.33 rows=12786 width=12) (actual time=2.304..7.755 rows=15509 loops=1)
-- Recheck Cond: ((operation)::text = 'update-search-index'::text)
-- Heap Blocks: exact=287
-- -> Bitmap Index Scan on ix_workcoveragerecords_operation (cost=0.00..664.31 rows=12786 width=0) (actual time=2.239..2.239 rows=15509 loops=1)
-- Index Cond: ((operation)::text = 'update-search-index'::text)
-- Planning Time: 0.625 ms
-- Execution Time: 34.827 ms
-- -----------------------------------------------------------------------------------------------------------------
-- Eliminate unused columns. We know we don't need most of them for counting query rows, so let's reduce
-- the cost the planner estimates in terms of I/O.
--
SELECT
works.id AS works_id,
works.presentation_ready AS works_presentation_ready
FROM works LEFT OUTER JOIN workcoveragerecords
ON works.id = workcoveragerecords.work_id
AND workcoveragerecords.operation = 'update-search-index'
WHERE (workcoveragerecords.id IS NULL OR workcoveragerecords.status NOT IN ('success', 'persistent failure'))
AND works.presentation_ready = true
;
-----------------------------------------------------------------------------------------------------------------------------------------------------------------
-- Hash Left Join (cost=2106.16..5450.97 rows=2285 width=5) (actual time=34.801..34.804 rows=0 loops=1)
-- Hash Cond: (works.id = workcoveragerecords.work_id)
-- Filter: ((workcoveragerecords.id IS NULL) OR (workcoveragerecords.status <> ALL ('{success,"persistent failure"}'::coverage_status[])))
-- Rows Removed by Filter: 15509
-- -> Seq Scan on works (cost=0.00..3304.09 rows=15509 width=5) (actual time=0.008..12.507 rows=15509 loops=1)
-- Filter: presentation_ready
-- -> Hash (cost=1946.33..1946.33 rows=12786 width=12) (actual time=13.341..13.343 rows=15509 loops=1)
-- Buckets: 16384 Batches: 1 Memory Usage: 795kB
-- -> Bitmap Heap Scan on workcoveragerecords (cost=667.51..1946.33 rows=12786 width=12) (actual time=2.295..7.744 rows=15509 loops=1)
-- Recheck Cond: ((operation)::text = 'update-search-index'::text)
-- Heap Blocks: exact=287
-- -> Bitmap Index Scan on ix_workcoveragerecords_operation (cost=0.00..664.31 rows=12786 width=0) (actual time=2.224..2.224 rows=15509 loops=1)
-- Index Cond: ((operation)::text = 'update-search-index'::text)
-- Planning Time: 0.646 ms
-- Execution Time: 34.870 ms
-- -----------------------------------------------------------------------------------------------------------------
-- Use an INNER JOIN instead of a left join. This drastically reduces costs by eliminating the costly LEFT JOIN node.
--
SELECT
works.id AS works_id,
works.presentation_ready AS works_presentation_ready
FROM works INNER JOIN workcoveragerecords
ON works.id = workcoveragerecords.work_id
AND workcoveragerecords.operation = 'update-search-index'
WHERE (workcoveragerecords.id IS NULL OR workcoveragerecords.status NOT IN ('success', 'persistent failure'))
AND works.presentation_ready = true
;
-- Hash Join (cost=1999.12..5343.94 rows=1884 width=5) (actual time=7.245..7.250 rows=0 loops=1)
-- Hash Cond: (works.id = workcoveragerecords.work_id)
-- -> Seq Scan on works (cost=0.00..3304.09 rows=15509 width=5) (actual time=0.008..0.009 rows=1 loops=1)
-- Filter: presentation_ready
-- -> Hash (cost=1975.57..1975.57 rows=1884 width=4) (actual time=7.230..7.232 rows=0 loops=1)
-- Buckets: 2048 Batches: 1 Memory Usage: 16kB
-- -> Bitmap Heap Scan on workcoveragerecords (cost=664.78..1975.57 rows=1884 width=4) (actual time=7.229..7.230 rows=0 loops=1)
-- Recheck Cond: ((operation)::text = 'update-search-index'::text)
-- Filter: ((id IS NULL) OR (status <> ALL ('{success,"persistent failure"}'::coverage_status[])))
-- Rows Removed by Filter: 15509
-- Heap Blocks: exact=287
-- -> Bitmap Index Scan on ix_workcoveragerecords_operation (cost=0.00..664.31 rows=12786 width=0) (actual time=2.208..2.209 rows=15509 loops=1)
-- Index Cond: ((operation)::text = 'update-search-index'::text)
-- Planning Time: 0.620 ms
-- Execution Time: 7.305 ms
-- -----------------------------------------------------------------------------------------------------------------
-- Eliminate the sequential scan on the works database with an index on the ID and presentation_ready state.
--
create index test_works_index on works using btree (presentation_ready, id);
-- Nested Loop (cost=665.07..3842.19 rows=1884 width=5) (actual time=7.366..7.368 rows=0 loops=1)
-- -> Bitmap Heap Scan on workcoveragerecords (cost=664.78..1975.57 rows=1884 width=4) (actual time=7.365..7.366 rows=0 loops=1)
-- Recheck Cond: ((operation)::text = 'update-search-index'::text)
-- Filter: ((id IS NULL) OR (status <> ALL ('{success,"persistent failure"}'::coverage_status[])))
-- Rows Removed by Filter: 15509
-- Heap Blocks: exact=287
-- -> Bitmap Index Scan on ix_workcoveragerecords_operation (cost=0.00..664.31 rows=12786 width=0) (actual time=2.209..2.209 rows=15509 loops=1)
-- Index Cond: ((operation)::text = 'update-search-index'::text)
-- -> Index Only Scan using test_works_index on works (cost=0.29..0.99 rows=1 width=5) (never executed)
-- Index Cond: ((presentation_ready = true) AND (id = workcoveragerecords.work_id))
-- Heap Fetches: 0
-- Planning Time: 0.738 ms
-- Execution Time: 7.417 ms
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment