Skip to content

Instantly share code, notes, and snippets.

@BobPusateri
Last active April 30, 2024 02:37
Show Gist options
  • Save BobPusateri/61fedc4408f0ba4a8362eb2e379cb148 to your computer and use it in GitHub Desktop.
Save BobPusateri/61fedc4408f0ba4a8362eb2e379cb148 to your computer and use it in GitHub Desktop.
Testing the performance of changing a column's datatype with and without data compression enabled
-- This script accompanies the following blog post:
-- https://bobpusateri.com/2024/04/compression-datatype-change/
-- Enable SSMS time & IO statistics
SET STATISTICS TIME ON;
SET STATISTICS IO ON;
-- create database, tables & populate them
CREATE DATABASE [TestDB]
ON PRIMARY
( NAME = N'TestDB', FILENAME = N'F:\data\TestDB.mdf' , SIZE = 1GB , FILEGROWTH = 1GB )
LOG ON
( NAME = N'TestDB_log', FILENAME = N'G:\log\TestDB_log.ldf' , SIZE = 256MB , FILEGROWTH = 1GB )
GO
ALTER DATABASE [TestDB] SET RECOVERY SIMPLE
GO
USE TestDB;
CREATE TABLE dbo.TheTable (
Id INT IDENTITY(1,1) NOT NULL,
Payload CHAR(5400) NOT NULL
)
CREATE TABLE dbo.Numbers (
i INT NOT NULL
);
WITH cteN(i) AS (
SELECT ROW_NUMBER() OVER (ORDER BY s1.[object_id]) - 1
FROM sys.all_columns AS s1
CROSS JOIN sys.all_columns AS s2
)
INSERT INTO dbo.Numbers(i)
SELECT i
FROM cteN
WHERE i <= 1000000
INSERT INTO dbo.TheTable (Payload)
SELECT REPLICATE(CONVERT(CHAR(36), NEWID()), 150)
FROM dbo.Numbers n1
CROSS JOIN dbo.Numbers n2
WHERE n2.i < 2
-- how big is the table?
SELECT
t.name AS TableName,
s.name AS SchemaName,
p.rows,
CAST(ROUND(((SUM(a.total_pages) * 8) / 1024.00), 2) AS NUMERIC(36, 2)) AS TotalSpaceMB,
CAST(ROUND(((SUM(a.used_pages) * 8) / 1024.00), 2) AS NUMERIC(36, 2)) AS UsedSpaceMB,
CAST(ROUND(((SUM(a.total_pages) - SUM(a.used_pages)) * 8) / 1024.00, 2) AS NUMERIC(36, 2)) AS UnusedSpaceMB
FROM
sys.tables t
INNER JOIN
sys.indexes i ON t.object_id = i.object_id
INNER JOIN
sys.partitions p ON i.object_id = p.object_id AND i.index_id = p.index_id
INNER JOIN
sys.allocation_units a ON p.partition_id = a.container_id
LEFT OUTER JOIN
sys.schemas s ON t.schema_id = s.schema_id
WHERE
t.is_ms_shipped = 0
GROUP BY
t.name, s.name, p.rows
ORDER BY
TotalSpaceMB DESC, t.name;
-- how many pages does the table occupy?
SELECT *
FROM sys.dm_db_index_physical_stats (DB_ID('TestDB'), OBJECT_ID('dbo.TheTable'), NULL, NULL, 'SAMPLED')
-- set database to FULL recovery, take full & log backups
ALTER DATABASE TestDB SET RECOVERY FULL;
BACKUP DATABASE TestDB
TO DISK = 'D:\Backup\TestDB.bak'
WITH COMPRESSION, INIT, FORMAT;
BACKUP LOG TestDB
TO DISK = 'D:\Backup\TestDB_1.trn'
WITH COMPRESSION, INIT, FORMAT;
-- how many changes are in the log right now? (should be very small)
SELECT log_since_last_log_backup_mb
FROM sys.dm_db_log_stats (DB_ID('TestDB'))
-- now let's change the datatype of the Id column from INT to BIGINT
-- note how long this takes
ALTER TABLE dbo.TheTable ALTER COLUMN Id BIGINT
-- how many MB of changes occurred? Note this number for later
SELECT log_since_last_log_backup_mb
FROM sys.dm_db_log_stats (DB_ID('TestDB'))
-- Let's take another log backup and see how many pages it occupies.
-- Note this number for later as well.
BACKUP LOG TestDB
TO DISK = 'D:\Backup\TestDB_BigInt_1.trn'
WITH COMPRESSION, INIT, FORMAT;
-- Now we need to repeat the test, this time with compression enabled.
-- Restore the database from backup
USE master;
RESTORE DATABASE TestDB
FROM DISK = 'D:\Backup\TestDB.bak'
WITH REPLACE;
-- Now enable row compression on the table
USE TestDB;
ALTER TABLE dbo.TheTable REBUILD
WITH (DATA_COMPRESSION = ROW);
-- how many MB of log were generated?
SELECT log_since_last_log_backup_mb
FROM sys.dm_db_log_stats (DB_ID('TestDB'))
-- Take full & log backups
BACKUP DATABASE TestDB
TO DISK = 'D:\Backup\TestDB_RowCompress.bak'
WITH COMPRESSION, INIT, FORMAT;
BACKUP LOG TestDB
TO DISK = 'D:\Backup\TestDB_RowCompress_1.trn'
WITH COMPRESSION, INIT, FORMAT;
-- how many MB of log now? This should be small again.
SELECT log_since_last_log_backup_mb
FROM sys.dm_db_log_stats (DB_ID('TestDB'))
-- Now let's change Id column to BIGINT again. Note how long this takes.
ALTER TABLE dbo.TheTable ALTER COLUMN Id BIGINT
-- how many MB of log were created?
SELECT log_since_last_log_backup_mb
FROM sys.dm_db_log_stats (DB_ID('TestDB'))
-- Let's backup the log again. Note how many pages are backed up
BACKUP LOG TestDB
TO DISK = 'D:\Backup\TestDB_RowCompress_BigInt_1.trn'
WITH COMPRESSION, INIT, FORMAT;
-- How do these numbers and speeds compare to when compression wasn't enabled?
-- You should find it significantly faster with compression enabled.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment