Last active
April 30, 2024 02:37
-
-
Save BobPusateri/61fedc4408f0ba4a8362eb2e379cb148 to your computer and use it in GitHub Desktop.
Testing the performance of changing a column's datatype with and without data compression enabled
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This script accompanies the following blog post: | |
-- https://bobpusateri.com/2024/04/compression-datatype-change/ | |
-- Enable SSMS time & IO statistics | |
SET STATISTICS TIME ON; | |
SET STATISTICS IO ON; | |
-- create database, tables & populate them | |
CREATE DATABASE [TestDB] | |
ON PRIMARY | |
( NAME = N'TestDB', FILENAME = N'F:\data\TestDB.mdf' , SIZE = 1GB , FILEGROWTH = 1GB ) | |
LOG ON | |
( NAME = N'TestDB_log', FILENAME = N'G:\log\TestDB_log.ldf' , SIZE = 256MB , FILEGROWTH = 1GB ) | |
GO | |
ALTER DATABASE [TestDB] SET RECOVERY SIMPLE | |
GO | |
USE TestDB; | |
CREATE TABLE dbo.TheTable ( | |
Id INT IDENTITY(1,1) NOT NULL, | |
Payload CHAR(5400) NOT NULL | |
) | |
CREATE TABLE dbo.Numbers ( | |
i INT NOT NULL | |
); | |
WITH cteN(i) AS ( | |
SELECT ROW_NUMBER() OVER (ORDER BY s1.[object_id]) - 1 | |
FROM sys.all_columns AS s1 | |
CROSS JOIN sys.all_columns AS s2 | |
) | |
INSERT INTO dbo.Numbers(i) | |
SELECT i | |
FROM cteN | |
WHERE i <= 1000000 | |
INSERT INTO dbo.TheTable (Payload) | |
SELECT REPLICATE(CONVERT(CHAR(36), NEWID()), 150) | |
FROM dbo.Numbers n1 | |
CROSS JOIN dbo.Numbers n2 | |
WHERE n2.i < 2 | |
-- how big is the table? | |
SELECT | |
t.name AS TableName, | |
s.name AS SchemaName, | |
p.rows, | |
CAST(ROUND(((SUM(a.total_pages) * 8) / 1024.00), 2) AS NUMERIC(36, 2)) AS TotalSpaceMB, | |
CAST(ROUND(((SUM(a.used_pages) * 8) / 1024.00), 2) AS NUMERIC(36, 2)) AS UsedSpaceMB, | |
CAST(ROUND(((SUM(a.total_pages) - SUM(a.used_pages)) * 8) / 1024.00, 2) AS NUMERIC(36, 2)) AS UnusedSpaceMB | |
FROM | |
sys.tables t | |
INNER JOIN | |
sys.indexes i ON t.object_id = i.object_id | |
INNER JOIN | |
sys.partitions p ON i.object_id = p.object_id AND i.index_id = p.index_id | |
INNER JOIN | |
sys.allocation_units a ON p.partition_id = a.container_id | |
LEFT OUTER JOIN | |
sys.schemas s ON t.schema_id = s.schema_id | |
WHERE | |
t.is_ms_shipped = 0 | |
GROUP BY | |
t.name, s.name, p.rows | |
ORDER BY | |
TotalSpaceMB DESC, t.name; | |
-- how many pages does the table occupy? | |
SELECT * | |
FROM sys.dm_db_index_physical_stats (DB_ID('TestDB'), OBJECT_ID('dbo.TheTable'), NULL, NULL, 'SAMPLED') | |
-- set database to FULL recovery, take full & log backups | |
ALTER DATABASE TestDB SET RECOVERY FULL; | |
BACKUP DATABASE TestDB | |
TO DISK = 'D:\Backup\TestDB.bak' | |
WITH COMPRESSION, INIT, FORMAT; | |
BACKUP LOG TestDB | |
TO DISK = 'D:\Backup\TestDB_1.trn' | |
WITH COMPRESSION, INIT, FORMAT; | |
-- how many changes are in the log right now? (should be very small) | |
SELECT log_since_last_log_backup_mb | |
FROM sys.dm_db_log_stats (DB_ID('TestDB')) | |
-- now let's change the datatype of the Id column from INT to BIGINT | |
-- note how long this takes | |
ALTER TABLE dbo.TheTable ALTER COLUMN Id BIGINT | |
-- how many MB of changes occurred? Note this number for later | |
SELECT log_since_last_log_backup_mb | |
FROM sys.dm_db_log_stats (DB_ID('TestDB')) | |
-- Let's take another log backup and see how many pages it occupies. | |
-- Note this number for later as well. | |
BACKUP LOG TestDB | |
TO DISK = 'D:\Backup\TestDB_BigInt_1.trn' | |
WITH COMPRESSION, INIT, FORMAT; | |
-- Now we need to repeat the test, this time with compression enabled. | |
-- Restore the database from backup | |
USE master; | |
RESTORE DATABASE TestDB | |
FROM DISK = 'D:\Backup\TestDB.bak' | |
WITH REPLACE; | |
-- Now enable row compression on the table | |
USE TestDB; | |
ALTER TABLE dbo.TheTable REBUILD | |
WITH (DATA_COMPRESSION = ROW); | |
-- how many MB of log were generated? | |
SELECT log_since_last_log_backup_mb | |
FROM sys.dm_db_log_stats (DB_ID('TestDB')) | |
-- Take full & log backups | |
BACKUP DATABASE TestDB | |
TO DISK = 'D:\Backup\TestDB_RowCompress.bak' | |
WITH COMPRESSION, INIT, FORMAT; | |
BACKUP LOG TestDB | |
TO DISK = 'D:\Backup\TestDB_RowCompress_1.trn' | |
WITH COMPRESSION, INIT, FORMAT; | |
-- how many MB of log now? This should be small again. | |
SELECT log_since_last_log_backup_mb | |
FROM sys.dm_db_log_stats (DB_ID('TestDB')) | |
-- Now let's change Id column to BIGINT again. Note how long this takes. | |
ALTER TABLE dbo.TheTable ALTER COLUMN Id BIGINT | |
-- how many MB of log were created? | |
SELECT log_since_last_log_backup_mb | |
FROM sys.dm_db_log_stats (DB_ID('TestDB')) | |
-- Let's backup the log again. Note how many pages are backed up | |
BACKUP LOG TestDB | |
TO DISK = 'D:\Backup\TestDB_RowCompress_BigInt_1.trn' | |
WITH COMPRESSION, INIT, FORMAT; | |
-- How do these numbers and speeds compare to when compression wasn't enabled? | |
-- You should find it significantly faster with compression enabled. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment