Skip to content

Instantly share code, notes, and snippets.

@chris-mcdonald-dev
Last active March 3, 2022 21:29
Show Gist options
  • Save chris-mcdonald-dev/ba32653a61386b0675b0d8e61d2d56ad to your computer and use it in GitHub Desktop.
Save chris-mcdonald-dev/ba32653a61386b0675b0d8e61d2d56ad to your computer and use it in GitHub Desktop.
Finds columns that qualify for storage optimizations of at least 40% across entire database
-- WHAT THIS DOES:
-- By default, NULL values take up storage as well.
-- Microsoft recommends considering a feature called "sparse columns" that reduces the size of NULL values in that column.
-- This only works well for columns with a large amount of NULL values. This script does this process for you.
-- DIRECTIONS ON HOW TO USE:
-- 1. Simply run the script initially to view all columns in your database that Microsoft recommends could benefit from being a "sparse column"
-- 2. Lastly, uncomment "Step #3", and run just that portion to actually make the changes to all columns that qualify.
--- NOTES:
-- To find Sparse Columns that no longer have enough NULL values, change the following
-- "PercentOfColumnNull >=" --------> "PercentOfColumnNull <="
-- "@wantToFindSparseColumns BIT = 0" --------> "@wantToFindSparseColumns BIT = 1"
-- STEP #1
-----------Find all columns in DB that have NULL values and list the percentage of NULL values in each---------
-- Create Temporary Procedure to get what percentage of the column NULL values are taking up.
DROP PROCEDURE IF EXISTS #sp_GetPercentOfColumnNull
GO
CREATE PROCEDURE #sp_GetPercentOfColumnNull (
@FullyQualifiedTableName VARCHAR(500),
@ColumnName VARCHAR(100),
@PercentOfColumnNull VARCHAR(10) OUTPUT,
@NumberOfNulls VARCHAR(10) OUTPUT,
@TotalTableRowCount VARCHAR(10) OUTPUT
)
AS
DECLARE @SQL NVARCHAR(MAX)
-- Get the total number of NULLs in the column
SET @SQL = 'SELECT @NumberOfNulls = COUNT(*) FROM ' + @FullyQualifiedTableName + ' WITH (nolock)
WHERE ' + @ColumnName + ' IS NULL'
-- Get the total number of rows in the column
SET @SQL += ' SELECT @TotalTableRowCount = COUNT(*) FROM ' + @FullyQualifiedTableName + ' WITH (nolock)'
EXEC sp_executesql @SQL, N'@NumberOfNulls INT OUTPUT, @TotalTableRowCount INT OUTPUT',
@NumberOfNulls = @NumberOfNulls OUTPUT, @TotalTableRowCount = @TotalTableRowCount OUTPUT
IF @TotalTableRowCount = 0
BEGIN
-- Avoid divide by 0 error
SET @PercentOfColumnNull = 0.0
END
ELSE
BEGIN
-- Get the percentage of column that is NULL
SET @SQL = 'SELECT @PercentOfColumnNull =
CAST(('+ @NumberOfNulls +'/ CAST('+ @TotalTableRowCount +' AS Numeric(13,1)))*100 AS Numeric(4,1))'
EXEC sp_executesql @sql, N'@PercentOfColumnNull VARCHAR(MAX) OUTPUT', @PercentOfColumnNull OUTPUT
END
GO
DECLARE @DatabaseName VARCHAR(100)
,@SchemaName VARCHAR(100)
,@TableName VARCHAR(100)
,@ColumnName VARCHAR(100)
,@FullyQualifiedTableName VARCHAR(500)
,@PercentOfColumnNull VARCHAR(10)
,@NumberOfNulls VARCHAR(10)
,@TotalTableRowCount VARCHAR(10)
,@TotalNumberOfColumns VARCHAR(20)
,@CurrentColumnOutOfTotal VARCHAR(20) = 0
--Create Temp Table to Save Results
DROP TABLE IF EXISTS #Results
CREATE TABLE #Results (
DatabaseName VARCHAR(100)
,SchemaName VARCHAR(100)
,TableName VARCHAR(100)
,ColumnName VARCHAR(100)
,NumberOfNulls INT
,TotalTableRowCount INT
,PercentOfColumnNull NUMERIC(4,1)
)
-- Since sparse columns can't be index columns, create a table to store full names and check if it's an index column.
DROP TABLE IF EXISTS #IndexColumns
SELECT '['+ OBJECT_SCHEMA_NAME(object_id) + '].['+ OBJECT_NAME(object_id) +'].[' + COL_NAME(object_id, column_id) +']' [FullColumn],
OBJECT_SCHEMA_NAME(object_id) AS SchemaName,
OBJECT_NAME(object_id) AS TableName,
COL_NAME(object_id, column_id) AS ColumnName
INTO #IndexColumns
FROM sys.index_columns
-- Get all database columns with null values to loop through
DROP TABLE IF EXISTS #DBColumnsToLoopThrough
SELECT C.Table_CataLog
,C.Table_Schema
,C.Table_Name
,C.Column_Name
,'[' + C.Table_CataLog + ']' + '.[' + C.Table_Schema + '].'
+ '[' + C.Table_Name + ']' AS FullQualifiedTableName
INTO #DBColumnsToLoopThrough
FROM information_schema.Columns C
INNER JOIN information_Schema.Tables T ON C.Table_Name = T.Table_Name
AND T.Table_Type = 'BASE TABLE'
AND C.IS_Nullable='YES'
-- Exclude columns that have max character lengths of -1. The size of these cannot be reduced
AND (C.CHARACTER_MAXIMUM_LENGTH <> -1 OR C.CHARACTER_MAXIMUM_LENGTH IS NULL)
-- Exclude columns that have rules or defaults
AND OBJECTPROPERTY(OBJECT_ID('[' + C.Table_CataLog + ']' + '.[' + C.Table_Schema + '].'
+ '[' + C.Table_Name + ']'), 'IsRule') = 0
AND C.COLUMN_DEFAULT IS NULL
LEFT JOIN #IndexColumns
ON C.COLUMN_NAME = #IndexColumns.ColumnName
AND C.TABLE_NAME = #IndexColumns.TableName
AND C.TABLE_SCHEMA = #IndexColumns.SchemaName
WHERE #IndexColumns.ColumnName IS NULL
DROP TABLE IF EXISTS #IndexColumns
SET @TotalNumberOfColumns = (SELECT COUNT(*) FROM #DBColumnsToLoopThrough)
-- Create a cursor to loop through all the tables
DECLARE Cur CURSOR FAST_FORWARD
FOR SELECT * FROM #DBColumnsToLoopThrough
OPEN Cur
FETCH NEXT
FROM Cur
INTO @DatabaseName
,@SchemaName
,@TableName
,@ColumnName
,@FullyQualifiedTableName
-- Start looping through the tables
WHILE @@FETCH_STATUS = 0 --Remains 0 while FETCH statement is successful and continues loop
BEGIN
EXEC #sp_GetPercentOfColumnNull @FullyQualifiedTableName, @ColumnName, @PercentOfColumnNull OUTPUT, @NumberOfNulls OUTPUT, @TotalTableRowCount OUTPUT
INSERT INTO #Results
SELECT
@DatabaseName AS DatabaseName,
@SchemaName AS SchemaName,
@TableName AS TableName,
@ColumnName AS ColumnName,
@NumberOfNulls As NumberOfNulls,
@TotalTableRowCount AS TotalTableRowCount,
@PercentOfColumnNull AS PercentOfColumnNull
-- Display the task progress in messages
SET @CurrentColumnOutOfTotal += 1
DECLARE @PercentageDone FLOAT(2) = ISNULL(CAST(@CurrentColumnOutOfTotal AS FLOAT(2))/NULLIF(CAST(@TotalNumberOfColumns AS FLOAT(2)), 0), 0) * 100
PRINT @FullyQualifiedTableName + '.' + @ColumnName
PRINT @CurrentColumnOutOfTotal+'/'+@TotalNumberOfColumns+' ('+CAST(@PercentageDone AS VARCHAR(10))+'%)'
FETCH NEXT
FROM Cur
INTO @DatabaseName
,@SchemaName
,@TableName
,@ColumnName
,@FullyQualifiedTableName
END
CLOSE Cur
DEALLOCATE Cur
DROP PROCEDURE IF EXISTS #sp_GetPercentOfColumnNull
DROP TABLE IF EXISTS #DBColumnsToLoopThrough
-- STEP #2
------Get all columns that qualify to be sparse columns--------
-- Set minimum percentages of columns that qualify them to be "sparse columns" (Provided by Microsoft)
DROP TABLE IF EXISTS #PercentageLimits
CREATE TABLE #PercentageLimits (
DataType VARCHAR(100),
Percents INT
)
INSERT INTO #PercentageLimits
VALUES
('varchar', 60), ('nvarchar', 60), ('datetime', 52), ('int', 64), ('float', 52),
('money', 52), ('time', 69), ('bit', 98), ('tinyint', 86), ('smallint', 76),
('bigint', 52), ('real', 64), ('smallmoney', 64), ('smalldatetime', 64),
('uniqueidentifier', 43), ('date', 69), ('datetime2', 57),
('datetimetoffset', 52), ('vardecimal', 60), ('varbinary', 60),
('xml', 60), ('hierarchyid', 60), ('char', 60), ('nchar', 60), ('binary', 60),
('numeric', 60), ('decimal', 60)
--Create a final temp table to save only columns that qualify as potential sparse columns
DECLARE @wantToFindSparseColumns BIT = 0
DROP TABLE IF EXISTS #PotentialSparseColumns
SELECT #Results.*, #PercentageLimits.Percents [PercentageLimit], info.DATA_TYPE [DataType], info.CHARACTER_MAXIMUM_LENGTH [MaxCharacterLength],
COLUMNPROPERTY(OBJECT_ID('['+#Results.SchemaName+'].['+#Results.TableName+']'), #Results.ColumnName, 'IsSparse') [IsSparceColumn]
INTO #PotentialSparseColumns
FROM #Results
-- Join the column info table to check columns' data types
INNER JOIN INFORMATION_SCHEMA.COLUMNS info
ON #Results.SchemaName = info.TABLE_SCHEMA AND #Results.TableName = info.TABLE_NAME AND #Results.ColumnName = info.COLUMN_NAME
-- Only include columns with data types in our percentage limits for sparse columns table
RIGHT JOIN #PercentageLimits
ON info.DATA_TYPE = #PercentageLimits.DataType
WHERE
-- Only get columns that aren't already sparse
COLUMNPROPERTY(OBJECT_ID('['+#Results.SchemaName+'].['+#Results.TableName+']'), #Results.ColumnName, 'IsSparse') = @wantToFindSparseColumns
-- Only get columns where the full column names match and NULL values take up more than the column type's limit in percentages
AND PercentOfColumnNull >= ISNULL((SELECT Percents FROM #PercentageLimits WHERE DataType = info.Data_TYPE), CASE WHEN @wantToFindSparseColumns = 0 THEN 100 ELSE 0 END)
SELECT * FROM #PotentialSparseColumns
-- STEP #3
--------------------CHANGE TO SPARSE COLUMNS----------------------
-- DECLARE @DatabaseName VARCHAR(100)
-- ,@SchemaName VARCHAR(100)
-- ,@TableName VARCHAR(100)
-- ,@ColumnName VARCHAR(100)
-- ,@ErrorMessage NVARCHAR(4000)
-- ,@ErrorSeverity INT
-- ,@ErrorState INT
-- -- Create another cursor to loop through our potential sparse columns table and change all of them
-- DECLARE SparseCursor CURSOR FAST_FORWARD -- FAST_FORWARD cuts time by around 75%
-- FOR SELECT DatabaseName, SchemaName, TableName, ColumnName FROM #PotentialSparseColumns
-- OPEN SparseCursor
-- FETCH NEXT
-- FROM SparseCursor
-- INTO @DatabaseName,
-- @SchemaName,
-- @TableName,
-- @ColumnName
-- DECLARE @SQL VARCHAR(MAX)
-- DECLARE @FullTableName VARCHAR(500)
-- WHILE @@FETCH_STATUS = 0
-- BEGIN
-- SET @FullTableName = '[' + @DatabaseName + '].[' + @SchemaName + '].[' + @TableName + ']'
-- SET @SQL = 'ALTER TABLE ' + @FullTableName + ' ALTER COLUMN [' + @ColumnName + '] ADD SPARSE'
-- PRINT @SQL
-- BEGIN TRY
-- EXEC(@SQL)
-- SET @SQL = 'ALTER INDEX ALL ON [' + @DatabaseName + '].[' + @SchemaName + '].[' + @TableName + '] REBUILD'
-- END TRY
-- BEGIN CATCH
-- -- Throw errors but still move on to the next column name in our --table.
-- SET @ErrorMessage = ERROR_MESSAGE()
-- SET @ErrorSeverity = ERROR_SEVERITY()
-- SET @ErrorState = ERROR_STATE()
-- RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState)
-- END CATCH
-- FETCH NEXT
-- FROM SparseCursor
-- INTO @DatabaseName,
-- @SchemaName,
-- @TableName,
-- @ColumnName
-- END
-- CLOSE SparseCursor
-- DEALLOCATE SparseCursor
-- -- Create one final cursor to rebuild the indexes of each of the tables
-- DECLARE IndexCursor CURSOR FAST_FORWARD
-- FOR SELECT DISTINCT DatabaseName, SchemaName, TableName FROM #PotentialSparseColumns
-- OPEN IndexCursor
-- FETCH NEXT
-- FROM IndexCursor
-- INTO @DatabaseName,
-- @SchemaName,
-- @TableName
-- WHILE @@FETCH_STATUS = 0
-- BEGIN
-- BEGIN TRY
-- SET @SQL = 'ALTER INDEX ALL ON [' + @DatabaseName + '].[' + @SchemaName + '].[' + @TableName + '] REBUILD'
-- PRINT @SQL
-- EXEC(@SQL)
-- END TRY
-- BEGIN CATCH
-- -- Throw errors but still move on to the next column name in our --table.
-- SET @ErrorMessage = ERROR_MESSAGE()
-- SET @ErrorSeverity = ERROR_SEVERITY()
-- SET @ErrorState = ERROR_STATE()
-- RAISERROR(@ErrorMessage, @ErrorSeverity, @ErrorState)
-- END CATCH
-- FETCH NEXT
-- FROM IndexCursor
-- INTO @DatabaseName,
-- @SchemaName,
-- @TableName
-- END
-- CLOSE IndexCursor
-- DEALLOCATE IndexCursor
--
--DROP TABLE IF EXISTS #PercentageLimits
--DROP TABLE IF EXISTS #Results
--DROP TABLE IF EXISTS #PotentialSparseColumns
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment