Skip to content

Instantly share code, notes, and snippets.

@shaundomingo
Last active August 29, 2015 14:27
Show Gist options
  • Save shaundomingo/c6a8b9044ce2a3b52c22 to your computer and use it in GitHub Desktop.
Save shaundomingo/c6a8b9044ce2a3b52c22 to your computer and use it in GitHub Desktop.
Script to automate the cleanup of old, unnecessary data in cloud and cloud_usage tables
-- CLEAN UP RECORDS
DROP PROCEDURE IF EXISTS cloud.nf_destroy_historical_data;
delimiter //
CREATE PROCEDURE cloud.nf_destroy_historical_data(IN tableName VARCHAR(255), IN columnCriteria VARCHAR(255), IN batchSize INTEGER, IN intervalDays INTEGER, IN dryRun INTEGER, IN debugOn INTEGER)
BEGIN
SET @count_rows_query = CONCAT("SELECT count(id) INTO @table_row_count FROM ", tableName);
SET @min_id_query = CONCAT("SELECT MIN(id), MAX(id) INTO @a, @m FROM ", tableName, " WHERE ", columnCriteria, " < DATE_SUB(CURRENT_DATE(), INTERVAL ", intervalDays, " DAY)");
SET @get_batch_query = CONCAT("SELECT id INTO @z FROM ", tableName, " WHERE id >= @a ORDER BY id LIMIT ", batchSize, ",1");
SET @delete_batch_query = CONCAT("DELETE FROM ", tableName, " where id >= @a AND id < @z AND ", columnCriteria, " < DATE_SUB(CURRENT_DATE(), INTERVAL ", intervalDays, " DAY)");
SET @dry_run_delete_batch_query = CONCAT("SELECT COUNT(*) FROM ", tableName, " where id >= @a AND id < @z AND ", columnCriteria, " < DATE_SUB(CURRENT_DATE(), INTERVAL ", intervalDays, " DAY)");
SET @debug = debugOn;
/* Get the minimum ID from specified table */
PREPARE stmt_min FROM @min_id_query;
EXECUTE stmt_min;
DEALLOCATE PREPARE stmt_min;
SET @start_id = @a;
/* Get the total rows in table at start */
PREPARE stmt_count FROM @count_rows_query;
EXECUTE stmt_count;
DEALLOCATE PREPARE stmt_count;
/* Print start status */
SELECT CONCAT("Starting cleanup of table ", tableName) status_update, @table_row_count start_table_row_count;
/* Loop through resultset cleaning up any matching records */
SOMEATONCE: LOOP
SET @z = null;
SET @current_deleted = 0;
/* Get the first batch of records of size 'batchSize' */
PREPARE stmt_batch FROM @get_batch_query;
IF @debug = 1 THEN
SELECT @get_batch_query as query_to_run, @z, @a, batchSize;
END IF;
EXECUTE stmt_batch;
DEALLOCATE PREPARE stmt_batch;
/* Check if there's one more batch to go */
IF @z is null AND @a < @m THEN
SET @z = @m;
END IF;
IF @z is null AND @a = @start_id THEN
SELECT CONCAT(tableName, ": Nothing to delete with those parameters. Started at id=", @start_id) as status_update;
LEAVE SOMEATONCE;
ELSEIF @z is null AND @a is null THEN
SELECT CONCAT(tableName, ": Nothing to do. No records match the criteria provided.") as status_update;
LEAVE SOMEATONCE;
ELSEIF @z is null THEN
SET @totalAtStart = @table_row_count;
/* Get the total rows in table at end */
PREPARE stmt_count FROM @count_rows_query;
EXECUTE stmt_count;
DEALLOCATE PREPARE stmt_count;
SET @totalAtEnd = @table_row_count;
SET @totalDeleted = @totalAtStart - @totalAtEnd;
SELECT CONCAT(tableName, ": Finished. Deleted records in range ", @start_id, " through ", @a, " in table ", tableName, ". ", @totalDeleted, " records deleted.") as status_update, @totalAtStart as start_table_row_count, @table_row_count as end_table_row_count;
LEAVE SOMEATONCE;
ELSE
SELECT CONCAT(tableName, ": Deleting ", tableName, " records: ", @a, " through ", @z-1) as status_update;
END IF;
/* Delete all records in this batch */
SET @deleteQuery = "";
IF dryRun = 1 THEN
SET @deleteQuery = @dry_run_delete_batch_query;
ELSE
SET @deleteQuery = @delete_batch_query;
END IF;
PREPARE stmt_delete FROM @deleteQuery;
IF @debug = 1 THEN
SELECT @delete_batch_query as query_to_run, @a, @z, columnCriteria, intervalDays;
END IF;
EXECUTE stmt_delete;
DEALLOCATE PREPARE stmt_delete;
SET @a = @z;
/* If HA MySQL, give the slave some time to catch up */
PREPARE stmt_sleep FROM "SELECT SLEEP(1)";
EXECUTE stmt_sleep;
DEALLOCATE PREPARE stmt_sleep;
END LOOP;
END
//
/*
CALL cloud.nf_destroy_historical_data('cloud.alert', 'created', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud.event', 'created', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud.usage_event', 'created', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud_usage.cloud_usage', 'end_date', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud_usage.usage_event', 'created', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud_usage.usage_ip_address', 'released', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud_usage.usage_job', 'end_date', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud_usage.usage_load_balancer_policy', 'deleted', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud_usage.usage_port_forwarding', 'deleted', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud_usage.usage_storage', 'deleted', 2000, 90, 0, 0);
CALL cloud.nf_destroy_historical_data('cloud_usage.usage_volume', 'deleted', 2000, 90, 0, 0);
-- Doesn't work for tables below as they have a composite primary key
-- CALL cloud.nf_destroy_historical_data('cloud_usage.usage_network_offering', 'deleted', 2000, 90, 0, 0);
-- CALL cloud.nf_destroy_historical_data('cloud_usage.usage_security_group', 'deleted', 2000, 90, 0, 0);
-- CALL cloud.nf_destroy_historical_data('cloud_usage.usage_vm_instance', 'end_date', 2000, 90, 0, 0);
-- CALL cloud.nf_destroy_historical_data('cloud_usage.usage_vpn_user', 'deleted', 2000, 90, 0, 0);
*/
@shaundomingo
Copy link
Author

Example in-flight output:

+------------------------------------------------------------------------------------------+
| status_update                                                                            |
+------------------------------------------------------------------------------------------+
| cloud_usage.cloud_usage: Deleting cloud_usage.cloud_usage records: 248001 through 250000 |
+------------------------------------------------------------------------------------------+
1 row in set (2 min 31.09 sec)

Example final output:

+----------------------------------------------------------------------------------------------------------------------------------------+-----------------------+---------------------+
| status_update                                                                                                                          | start_table_row_count | end_table_row_count |
+----------------------------------------------------------------------------------------------------------------------------------------+-----------------------+---------------------+
| cloud_usage.cloud_usage: Finished. Deleted records in range 1 through 252001 in table cloud_usage.cloud_usage. 224789 records deleted. |                252593 |               27804 |
+----------------------------------------------------------------------------------------------------------------------------------------+-----------------------+---------------------+
1 row in set (2 min 33.10 sec)

If nothing matches the criteria:

+---------------------------------------------------------------------------------+
| status_update                                                                   |
+---------------------------------------------------------------------------------+
| cloud_usage.usage_event: Nothing to do. No records match the criteria provided. |
+---------------------------------------------------------------------------------+
1 row in set (0.01 sec)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment