Skip to content

Instantly share code, notes, and snippets.

@npwalker
Created March 1, 2016 16:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save npwalker/4ea81e5c1eba01535967 to your computer and use it in GitHub Desktop.
Save npwalker/4ea81e5c1eba01535967 to your computer and use it in GitHub Desktop.
PuppetDB GC Query Options For Bulk Deletion
-- report-ttl temp table implementation
SELECT id INTO TEMP TABLE report_ids_past_report_ttl
from reports r
where r.producer_timestamp < NOW() - INTERVAL '14 days';
ANALYZE report_ids_past_report_ttl;
DELETE from resource_events where report_id IN ( select id from report_ids_past_report_ttl );
UPDATE certnames SET latest_report_id = NULL where latest_report_id IN ( select id from report_ids_past_report_ttl );
DELETE FROM reports where id IN ( select id from report_ids_past_report_ttl );
DROP TABLE report_ids_past_report_ttl;
--report-ttl CTE implementation
WITH deleted_reports AS (
DELETE FROM reports r
WHERE r.producer_timestamp < NOW() - INTERVAL '14 days'
OR certname in ( SELECT certname FROM deleted_certnames )
RETURNING r.id
),
deleted_resource_events AS (
DELETE FROM resource_events re
WHERE report_id IN ( SELECT id FROM deleted_reports )
RETURNING re.report_id
)
UPDATE certnames
SET latest_report_id = NULL
WHERE latest_report_id IN ( SELECT report_id FROM deleted_resource_events );
--node-purge-ttl CTE implementation
EXPLAIN ANALYZE
WITH limited_certnames AS (
SELECT certname
FROM certnames
WHERE expired IS NOT NULL or deactivated IS NOT NULL
LIMIT 10
),
deleted_certnames AS (
DELETE FROM certnames
WHERE certname IN ( SELECT certname FROM limited_certnames )
RETURNING certname
),
deleted_factsets AS (
DELETE FROM factsets
WHERE certname IN ( SELECT certname FROM deleted_certnames )
RETURNING id
),
deleted_facts AS (
DELETE from facts
WHERE factset_id IN ( SELECT id FROM deleted_factsets )
RETURNING factset_id
),
deleted_catalogs AS (
DELETE FROM catalogs
WHERE certname IN ( SELECT certname FROM deleted_certnames )
RETURNING id
),
deleted_catalog_resources AS (
DELETE FROM catalog_resources
WHERE catalog_id IN ( SELECT id FROM deleted_catalogs )
RETURNING catalog_id
),
deleted_reports AS (
DELETE FROM reports
WHERE certname IN ( SELECT certname FROM deleted_certnames );
)
DELETE FROM resource_events
WHERE report_id IN ( SELECT id FROM deleted_reports )
-- node-purge-ttl Temp Table Implementation
SELECT certname
INTO TEMPORARY TABLE certnames_past_node_ttl
FROM certnames
WHERE ( expired IS NOT NULL OR deactivated IS NOT NULL );
ANALYZE certnames_past_node_ttl;
SELECT id
INTO TEMPORARY TABLE factsets_to_delete
FROM factsets
WHERE certname IN ( SELECT certname FROM certnames_past_node_ttl );
ANALYZE factsets_to_delete;
DELETE FROM facts
WHERE factset_id IN ( SELECT id FROM factsets_to_delete );
DELETE FROM factsets
WHERE id IN ( SELECT id FROM factsets_to_delete );
DROP TABLE factsets_to_delete;
SELECT id
INTO TEMPORARY TABLE catalogs_to_delete
FROM catalogs
WHERE certname IN ( SELECT certname FROM certnames_past_node_ttl );
ANALYZE catalogs_to_delete;
DELETE FROM catalog_resources
WHERE catalog_id IN ( SELECT id FROM catalogs_to_delete );
DELETE FROM catalogs
WHERE id IN ( SELECT id FROM catalogs_to_delete );
DROP TABLE catalogs_to_delete;
DELETE FROM reports
WHERE certname IN ( SELECT certname FROM certnames_past_node_ttl );
##uggh need to do the same logic here as in report-ttl to delete from resource_events and update certnames
DELETE FROM certnames
WHERE certname IN ( SELECT certname FROM certnames_past_node_ttl);
DROP TABLE certnames_past_node_ttl;
--Attempt At combining node-purge-ttl with report-ttl
--This top section only runs if node-purge-ttl is enabled
--So we put the certnames into a temp table that can be utilized by report-ttl
--If this never runs then we'll make an empty temp table for report-ttl
SELECT certname
INTO certnames_to_delete
FROM certnames
WHERE expired IS NOT NULL OR deactivated IS NOT NULL;
-- We could add a LIMIT here and allow users to configure how many certnames to delete each gc run
-- This means you could prevent a long running delete when you decide to purge hundreds of nodes at once
ANALYZE certnames_to_delete;
EXPLAIN ANALYZE
WITH deleted_certnames AS (
DELETE FROM certnames
WHERE certname IN ( SELECT certname FROM certnames_to_delete )
RETURNING certname
),
deleted_factsets AS (
DELETE FROM factsets
WHERE certname IN ( SELECT certname FROM deleted_certnames )
RETURNING id
),
deleted_facts AS (
DELETE from facts
WHERE factset_id IN ( SELECT id FROM deleted_factsets )
RETURNING factset_id
),
deleted_catalogs AS (
DELETE FROM catalogs
WHERE certname IN ( SELECT certname FROM deleted_certnames )
RETURNING id
)
DELETE FROM catalog_resources
WHERE catalog_id IN ( SELECT id FROM deleted_catalogs );
--hmmm hypothetically we should drop the temp table here but that'd need to be
--determined by whether report-ttl is enabled or not
--a temp table will be dropped as a result of a closed session but I'm not sure
--how the connection pooling works
CREATE TEMPORARY TABLE IF NOT EXISTS certnames_to_delete (
certname text
);
WITH deleted_reports AS (
DELETE FROM reports r
WHERE r.producer_timestamp < NOW() - INTERVAL '14 days'
OR certname IN ( SELECT certname FROM certnames_to_delete )
RETURNING r.id
),
deleted_resource_events AS (
DELETE FROM resource_events re
WHERE report_id IN ( SELECT id FROM deleted_reports )
RETURNING re.report_id
)
UPDATE certnames
SET latest_report_id = NULL
WHERE latest_report_id IN ( SELECT report_id FROM deleted_resource_events );
DROP TABLE certnames_to_delete;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment