Skip to content

Instantly share code, notes, and snippets.

@benoittgt
Last active November 16, 2021 16:18
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benoittgt/c89d8d84a252f9c4f52bc22e7ad6624d to your computer and use it in GitHub Desktop.
Save benoittgt/c89d8d84a252f9c4f52bc22e7ad6624d to your computer and use it in GitHub Desktop.
--- Setup
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
DROP TABLE IF EXISTS planner_checksums CASCADE;
CREATE TABLE IF NOT EXISTS planner_checksums (
identifiers_checksum uuid,
attributes_checksum uuid,
version INT
);
CREATE INDEX planner_checksums_idx ON planner_checksums(identifiers_checksum);
CREATE INDEX planner_checksums_idx_2 ON planner_checksums_dup(version, identifiers_checksum, attributes_checksum);
INSERT INTO planner_checksums (identifiers_checksum, attributes_checksum, version)
SELECT uuid_generate_v4(), uuid_generate_v4(), 1636387210
FROM generate_series(1, 9000000);
INSERT INTO planner_checksums (identifiers_checksum, attributes_checksum, version)
SELECT identifiers_checksum, attributes_checksum, 1636387212
FROM planner_checksums
ORDER BY 1 DESC
LIMIT 100000;
INSERT INTO planner_checksums (identifiers_checksum, attributes_checksum, version)
SELECT identifiers_checksum, attributes_checksum, 1636387212
FROM planner_checksums
ORDER BY 1 ASC
LIMIT 100000;
INSERT INTO planner_checksums (identifiers_checksum, attributes_checksum, version)
SELECT identifiers_checksum, attributes_checksum, 1636387212
FROM planner_checksums
ORDER BY 1 DESC
LIMIT 200;
-- End setup
-- 1. Initial query
BEGIN;
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) DELETE FROM planner_checksums WHERE identifiers_checksum IN (
SELECT identifiers_checksum FROM "planner_checksums"
GROUP BY "identifiers_checksum", "attributes_checksum"
HAVING COUNT(*) > 1
);
-- Time: 42.5s
-- https://explain.dalibo.com/plan/PJd
ROLLBACK;
-- 2. GROUP BY and HAVING and CTE
BEGIN;
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) WITH planner_checksums_dup AS (
SELECT identifiers_checksum
FROM "planner_checksums"
GROUP BY "identifiers_checksum", "attributes_checksum"
HAVING COUNT(*) > 1
)
DELETE FROM planner_checksums
USING planner_checksums_dup
WHERE planner_checksums_dup.identifiers_checksum = planner_checksums.identifiers_checksum;
-- Time: 125.7s
-- https://explain.dalibo.com/plan/cNT
ROLLBACK;
--
-- 3. SELF JOIN
BEGIN;
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) DELETE FROM planner_checksums pc1
USING planner_checksums pc2
WHERE pc2.version != pc1.version
AND pc2.identifiers_checksum = pc1.identifiers_checksum
AND pc2.attributes_checksum = pc1.attributes_checksum;
-- Time: 264.6s
-- https://explain.dalibo.com/plan/359
ROLLBACK;
--
-- 4. CTE and ROW NUMBER
BEGIN;
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS, FORMAT JSON) WITH planner_checksums_dup_row AS (
SELECT identifiers_checksum, ROW_NUMBER() OVER (PARTITION BY identifiers_checksum, attributes_checksum)
FROM "planner_checksums"
),
planner_checksums_dup_v AS (
SELECT * FROM planner_checksums_dup_row WHERE row_number > 1
)
DELETE FROM planner_checksums
USING planner_checksums_dup_v
WHERE planner_checksums_dup_v.identifiers_checksum = planner_checksums.identifiers_checksum;
-- Time: 24.9s ---- THE WINNER
-- https://explain.dalibo.com/plan/Pze
ROLLBACK;
--
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment