Last active
March 3, 2022 21:29
-
-
Save chris-mcdonald-dev/ba32653a61386b0675b0d8e61d2d56ad to your computer and use it in GitHub Desktop.
Finds columns that qualify for storage optimizations of at least 40% across entire database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- WHAT THIS DOES: | |
-- By default, NULL values take up storage as well. | |
-- Microsoft recommends considering a feature called "sparse columns" that reduces the size of NULL values in that column. | |
-- This only works well for columns with a large amount of NULL values. This script does this process for you. | |
-- DIRECTIONS ON HOW TO USE: | |
-- 1. Simply run the script initially to view all columns in your database that Microsoft recommends could benefit from being a "sparse column" | |
-- 2. Lastly, uncomment "Step #3", and run just that portion to actually make the changes to all columns that qualify. | |
--- NOTES: | |
-- To find Sparse Columns that no longer have enough NULL values, change the following | |
-- "PercentOfColumnNull >=" --------> "PercentOfColumnNull <=" | |
-- "@wantToFindSparseColumns BIT = 0" --------> "@wantToFindSparseColumns BIT = 1" | |
-- STEP #1 | |
-----------Find all columns in DB that have NULL values and list the percentage of NULL values in each--------- | |
-- Create Temporary Procedure to get what percentage of the column NULL values are taking up. | |
DROP PROCEDURE IF EXISTS #sp_GetPercentOfColumnNull | |
GO | |
CREATE PROCEDURE #sp_GetPercentOfColumnNull ( | |
@FullyQualifiedTableName VARCHAR(500), | |
@ColumnName VARCHAR(100), | |
@PercentOfColumnNull VARCHAR(10) OUTPUT, | |
@NumberOfNulls VARCHAR(10) OUTPUT, | |
@TotalTableRowCount VARCHAR(10) OUTPUT | |
) | |
AS | |
DECLARE @SQL NVARCHAR(MAX) | |
-- Get the total number of NULLs in the column | |
SET @SQL = 'SELECT @NumberOfNulls = COUNT(*) FROM ' + @FullyQualifiedTableName + ' WITH (nolock) | |
WHERE ' + @ColumnName + ' IS NULL' | |
-- Get the total number of rows in the column | |
SET @SQL += ' SELECT @TotalTableRowCount = COUNT(*) FROM ' + @FullyQualifiedTableName + ' WITH (nolock)' | |
EXEC sp_executesql @SQL, N'@NumberOfNulls INT OUTPUT, @TotalTableRowCount INT OUTPUT', | |
@NumberOfNulls = @NumberOfNulls OUTPUT, @TotalTableRowCount = @TotalTableRowCount OUTPUT | |
IF @TotalTableRowCount = 0 | |
BEGIN | |
-- Avoid divide by 0 error | |
SET @PercentOfColumnNull = 0.0 | |
END | |
ELSE | |
BEGIN | |
-- Get the percentage of column that is NULL | |
SET @SQL = 'SELECT @PercentOfColumnNull = | |
CAST(('+ @NumberOfNulls +'/ CAST('+ @TotalTableRowCount +' AS Numeric(13,1)))*100 AS Numeric(4,1))' | |
EXEC sp_executesql @sql, N'@PercentOfColumnNull VARCHAR(MAX) OUTPUT', @PercentOfColumnNull OUTPUT | |
END | |
GO | |
DECLARE @DatabaseName VARCHAR(100) | |
,@SchemaName VARCHAR(100) | |
,@TableName VARCHAR(100) | |
,@ColumnName VARCHAR(100) | |
,@FullyQualifiedTableName VARCHAR(500) | |
,@PercentOfColumnNull VARCHAR(10) | |
,@NumberOfNulls VARCHAR(10) | |
,@TotalTableRowCount VARCHAR(10) | |
,@TotalNumberOfColumns VARCHAR(20) | |
,@CurrentColumnOutOfTotal VARCHAR(20) = 0 | |
--Create Temp Table to Save Results | |
DROP TABLE IF EXISTS #Results | |
CREATE TABLE #Results ( | |
DatabaseName VARCHAR(100) | |
,SchemaName VARCHAR(100) | |
,TableName VARCHAR(100) | |
,ColumnName VARCHAR(100) | |
,NumberOfNulls INT | |
,TotalTableRowCount INT | |
,PercentOfColumnNull NUMERIC(4,1) | |
) | |
-- Since sparse columns can't be index columns, create a table to store full names and check if it's an index column. | |
DROP TABLE IF EXISTS #IndexColumns | |
SELECT '['+ OBJECT_SCHEMA_NAME(object_id) + '].['+ OBJECT_NAME(object_id) +'].[' + COL_NAME(object_id, column_id) +']' [FullColumn], | |
OBJECT_SCHEMA_NAME(object_id) AS SchemaName, | |
OBJECT_NAME(object_id) AS TableName, | |
COL_NAME(object_id, column_id) AS ColumnName | |
INTO #IndexColumns | |
FROM sys.index_columns | |
-- Get all database columns with null values to loop through | |
DROP TABLE IF EXISTS #DBColumnsToLoopThrough | |
SELECT C.Table_CataLog | |
,C.Table_Schema | |
,C.Table_Name | |
,C.Column_Name | |
,'[' + C.Table_CataLog + ']' + '.[' + C.Table_Schema + '].' | |
+ '[' + C.Table_Name + ']' AS FullQualifiedTableName | |
INTO #DBColumnsToLoopThrough | |
FROM information_schema.Columns C | |
INNER JOIN information_Schema.Tables T ON C.Table_Name = T.Table_Name | |
AND T.Table_Type = 'BASE TABLE' | |
AND C.IS_Nullable='YES' | |
-- Exclude columns that have max character lengths of -1. The size of these cannot be reduced | |
AND (C.CHARACTER_MAXIMUM_LENGTH <> -1 OR C.CHARACTER_MAXIMUM_LENGTH IS NULL) | |
-- Exclude columns that have rules or defaults | |
AND OBJECTPROPERTY(OBJECT_ID('[' + C.Table_CataLog + ']' + '.[' + C.Table_Schema + '].' | |
+ '[' + C.Table_Name + ']'), 'IsRule') = 0 | |
AND C.COLUMN_DEFAULT IS NULL | |
LEFT JOIN #IndexColumns | |
ON C.COLUMN_NAME = #IndexColumns.ColumnName | |
AND C.TABLE_NAME = #IndexColumns.TableName | |
AND C.TABLE_SCHEMA = #IndexColumns.SchemaName | |
WHERE #IndexColumns.ColumnName IS NULL | |
DROP TABLE IF EXISTS #IndexColumns | |
SET @TotalNumberOfColumns = (SELECT COUNT(*) FROM #DBColumnsToLoopThrough) | |
-- Create a cursor to loop through all the tables | |
DECLARE Cur CURSOR FAST_FORWARD | |
FOR SELECT * FROM #DBColumnsToLoopThrough | |
OPEN Cur | |
FETCH NEXT | |
FROM Cur | |
INTO @DatabaseName | |
,@SchemaName | |
,@TableName | |
,@ColumnName | |
,@FullyQualifiedTableName | |
-- Start looping through the tables | |
WHILE @@FETCH_STATUS = 0 --Remains 0 while FETCH statement is successful and continues loop | |
BEGIN | |
EXEC #sp_GetPercentOfColumnNull @FullyQualifiedTableName, @ColumnName, @PercentOfColumnNull OUTPUT, @NumberOfNulls OUTPUT, @TotalTableRowCount OUTPUT | |
INSERT INTO #Results | |
SELECT | |
@DatabaseName AS DatabaseName, | |
@SchemaName AS SchemaName, | |
@TableName AS TableName, | |
@ColumnName AS ColumnName, | |
@NumberOfNulls As NumberOfNulls, | |
@TotalTableRowCount AS TotalTableRowCount, | |
@PercentOfColumnNull AS PercentOfColumnNull | |
-- Display the task progress in messages | |
SET @CurrentColumnOutOfTotal += 1 | |
DECLARE @PercentageDone FLOAT(2) = ISNULL(CAST(@CurrentColumnOutOfTotal AS FLOAT(2))/NULLIF(CAST(@TotalNumberOfColumns AS FLOAT(2)), 0), 0) * 100 | |
PRINT @FullyQualifiedTableName + '.' + @ColumnName | |
PRINT @CurrentColumnOutOfTotal+'/'+@TotalNumberOfColumns+' ('+CAST(@PercentageDone AS VARCHAR(10))+'%)' | |
FETCH NEXT | |
FROM Cur | |
INTO @DatabaseName | |
,@SchemaName | |
,@TableName | |
,@ColumnName | |
,@FullyQualifiedTableName | |
END | |
CLOSE Cur | |
DEALLOCATE Cur | |
DROP PROCEDURE IF EXISTS #sp_GetPercentOfColumnNull | |
DROP TABLE IF EXISTS #DBColumnsToLoopThrough | |
-- STEP #2 | |
------Get all columns that qualify to be sparse columns-------- | |
-- Set minimum percentages of columns that qualify them to be "sparse columns" (Provided by Microsoft) | |
DROP TABLE IF EXISTS #PercentageLimits | |
CREATE TABLE #PercentageLimits ( | |
DataType VARCHAR(100), | |
Percents INT | |
) | |
INSERT INTO #PercentageLimits | |
VALUES | |
('varchar', 60), ('nvarchar', 60), ('datetime', 52), ('int', 64), ('float', 52), | |
('money', 52), ('time', 69), ('bit', 98), ('tinyint', 86), ('smallint', 76), | |
('bigint', 52), ('real', 64), ('smallmoney', 64), ('smalldatetime', 64), | |
('uniqueidentifier', 43), ('date', 69), ('datetime2', 57), | |
('datetimetoffset', 52), ('vardecimal', 60), ('varbinary', 60), | |
('xml', 60), ('hierarchyid', 60), ('char', 60), ('nchar', 60), ('binary', 60), | |
('numeric', 60), ('decimal', 60) | |
--Create a final temp table to save only columns that qualify as potential sparse columns | |
DECLARE @wantToFindSparseColumns BIT = 0 | |
DROP TABLE IF EXISTS #PotentialSparseColumns | |
SELECT #Results.*, #PercentageLimits.Percents [PercentageLimit], info.DATA_TYPE [DataType], info.CHARACTER_MAXIMUM_LENGTH [MaxCharacterLength], | |
COLUMNPROPERTY(OBJECT_ID('['+#Results.SchemaName+'].['+#Results.TableName+']'), #Results.ColumnName, 'IsSparse') [IsSparceColumn] | |
INTO #PotentialSparseColumns | |
FROM #Results | |
-- Join the column info table to check columns' data types | |
INNER JOIN INFORMATION_SCHEMA.COLUMNS info | |
ON #Results.SchemaName = info.TABLE_SCHEMA AND #Results.TableName = info.TABLE_NAME AND #Results.ColumnName = info.COLUMN_NAME | |
-- Only include columns with data types in our percentage limits for sparse columns table | |
RIGHT JOIN #PercentageLimits | |
ON info.DATA_TYPE = #PercentageLimits.DataType | |
WHERE | |
-- Only get columns that aren't already sparse | |
COLUMNPROPERTY(OBJECT_ID('['+#Results.SchemaName+'].['+#Results.TableName+']'), #Results.ColumnName, 'IsSparse') = @wantToFindSparseColumns | |
-- Only get columns where the full column names match and NULL values take up more than the column type's limit in percentages | |
AND PercentOfColumnNull >= ISNULL((SELECT Percents FROM #PercentageLimits WHERE DataType = info.Data_TYPE), CASE WHEN @wantToFindSparseColumns = 0 THEN 100 ELSE 0 END) | |
SELECT * FROM #PotentialSparseColumns | |
-- STEP #3 | |
--------------------CHANGE TO SPARSE COLUMNS---------------------- | |
-- DECLARE @DatabaseName VARCHAR(100) | |
-- ,@SchemaName VARCHAR(100) | |
-- ,@TableName VARCHAR(100) | |
-- ,@ColumnName VARCHAR(100) | |
-- ,@ErrorMessage NVARCHAR(4000) | |
-- ,@ErrorSeverity INT | |
-- ,@ErrorState INT | |
-- -- Create another cursor to loop through our potential sparse columns table and change all of them | |
-- DECLARE SparseCursor CURSOR FAST_FORWARD -- FAST_FORWARD cuts time by around 75% | |
-- FOR SELECT DatabaseName, SchemaName, TableName, ColumnName FROM #PotentialSparseColumns | |
-- OPEN SparseCursor | |
-- FETCH NEXT | |
-- FROM SparseCursor | |
-- INTO @DatabaseName, | |
-- @SchemaName, | |
-- @TableName, | |
-- @ColumnName | |
-- DECLARE @SQL VARCHAR(MAX) | |
-- DECLARE @FullTableName VARCHAR(500) | |
-- WHILE @@FETCH_STATUS = 0 | |
-- BEGIN | |
-- SET @FullTableName = '[' + @DatabaseName + '].[' + @SchemaName + '].[' + @TableName + ']' | |
-- SET @SQL = 'ALTER TABLE ' + @FullTableName + ' ALTER COLUMN [' + @ColumnName + '] ADD SPARSE' | |
-- PRINT @SQL | |
-- BEGIN TRY | |
-- EXEC(@SQL) | |
-- SET @SQL = 'ALTER INDEX ALL ON [' + @DatabaseName + '].[' + @SchemaName + '].[' + @TableName + '] REBUILD' | |
-- END TRY | |
-- BEGIN CATCH | |
-- -- Throw errors but still move on to the next column name in our --table. | |
-- SET @ErrorMessage = ERROR_MESSAGE() | |
-- SET @ErrorSeverity = ERROR_SEVERITY() | |
-- SET @ErrorState = ERROR_STATE() | |
-- RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState) | |
-- END CATCH | |
-- FETCH NEXT | |
-- FROM SparseCursor | |
-- INTO @DatabaseName, | |
-- @SchemaName, | |
-- @TableName, | |
-- @ColumnName | |
-- END | |
-- CLOSE SparseCursor | |
-- DEALLOCATE SparseCursor | |
-- -- Create one final cursor to rebuild the indexes of each of the tables | |
-- DECLARE IndexCursor CURSOR FAST_FORWARD | |
-- FOR SELECT DISTINCT DatabaseName, SchemaName, TableName FROM #PotentialSparseColumns | |
-- OPEN IndexCursor | |
-- FETCH NEXT | |
-- FROM IndexCursor | |
-- INTO @DatabaseName, | |
-- @SchemaName, | |
-- @TableName | |
-- WHILE @@FETCH_STATUS = 0 | |
-- BEGIN | |
-- BEGIN TRY | |
-- SET @SQL = 'ALTER INDEX ALL ON [' + @DatabaseName + '].[' + @SchemaName + '].[' + @TableName + '] REBUILD' | |
-- PRINT @SQL | |
-- EXEC(@SQL) | |
-- END TRY | |
-- BEGIN CATCH | |
-- -- Throw errors but still move on to the next column name in our --table. | |
-- SET @ErrorMessage = ERROR_MESSAGE() | |
-- SET @ErrorSeverity = ERROR_SEVERITY() | |
-- SET @ErrorState = ERROR_STATE() | |
-- RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState) | |
-- END CATCH | |
-- FETCH NEXT | |
-- FROM IndexCursor | |
-- INTO @DatabaseName, | |
-- @SchemaName, | |
-- @TableName | |
-- END | |
-- CLOSE IndexCursor | |
-- DEALLOCATE IndexCursor | |
-- | |
--DROP TABLE IF EXISTS #PercentageLimits | |
--DROP TABLE IF EXISTS #Results | |
--DROP TABLE IF EXISTS #PotentialSparseColumns |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment