Run this to get a list of columnstore index partitions to determine which should be reorganized. This script also generates index reorg statements which could be run.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
------------------------------------------------------------------- | |
-- Script Name: ColumnstoreReorg.sql | |
-- | |
-- Desc: Run this to get a list of columnstore index partitions | |
-- to determine which should be reorganized. The script also | |
-- generates index reorg statements which could be run. | |
-- | |
-- Notes: SQL Server 2016 is REQUIRED. SQL Server 2016 changed | |
-- what index reorganization does to columnstore indexes, | |
-- and this script takes advantage of those changes. | |
-- | |
-- Auth: Kevin Feasel | |
-- Date: 2016-03-21 | |
-- | |
-- Change History | |
-- -------------- | |
-- Date - Auth: YYYY-MM-DD - Author | |
-- Description: DESCRIPTION | |
-------------------------------------------------------------------- | |
--Helpful articles: | |
--https://blogs.msdn.microsoft.com/sqlserverstorageengine/2016/03/07/columnstore-index-defragmentation-using-reorganize-command/ | |
--https://blogs.msdn.microsoft.com/sqlserverstorageengine/2016/03/08/columnstore-index-merge-policy-for-reorganize/ | |
DECLARE | |
@ColumnstoreRowMax INT = 1048576, | |
--This is set to 900K instead of 1048576 because there are scenarios where row group N-1 might have 910K+ rows and group N ~100K. Smashing | |
--them together would lead to more than 1048576 rows, so Reorganize's behavior is to pack row group N-1 most of the way but leave enough in N | |
--to make sense. | |
@ColumnstorePracticalRowMax INT = 900000, | |
--How many rows we see in the Delta Store before deciding we want to compress this rowgroup (without waiting for the tuple mover to do its job). | |
@MaxDeltaStoreRows INT = 1048576, | |
@MaxPartitionsBeforeBeingOld INT = 3, | |
@MaxDeletedRowPercent INT = 15, | |
--How many rules should fail before we give the signal to reorganize a partition. 0 = show all partitions. | |
@ReorganizeThreshold INT = 0; | |
WITH cspartitions AS | |
( | |
SELECT | |
csrg.partition_number AS PartitionNumber, | |
COUNT(1) OVER (PARTITION BY s.name, t.name, i.name) AS NumberOfPartitions, | |
t.name AS TableName, | |
s.name AS SchemaName, | |
i.name AS IndexName, | |
DENSE_RANK() OVER (ORDER BY csrg.partition_number DESC) AS PartitionOrderDesc, | |
SUM(calc.ActiveRowGroup) AS RowGroups, | |
SUM(calc.CompressedRowGroup) AS CompressedRowGroups, | |
SUM(calc.NotCompressedRowGroup) AS NotCompressedRowGroups, | |
SUM(calc.DeltaStoreRowGroup) AS DeltaStoreRowGroups, | |
SUM(calc.DeltaStoreRows) AS DeltaStoreRows, | |
CASE | |
WHEN SUM(calc.DeltaStoreRowGroup) = 0 THEN 0 | |
ELSE 1.0 * SUM(calc.DeltaStoreRows) / SUM(calc.DeltaStoreRowGroup) | |
END AS AverageDeltaStoreRows, | |
CASE | |
WHEN SUM(calc.CompressedRows) = 0 THEN 0.00 | |
ELSE 100.0 * SUM(calc.DeletedRows) / SUM(calc.CompressedRows) | |
END AS PercentCompressedRowsDeleted, | |
MAX(CASE WHEN calc.CompressedRowGroup = 1 THEN 100.0 * calc.DeletedRows / calc.CompressedRows ELSE 0 END) AS MaxPercentCompressedRowsDeleted, | |
SUM(CASE WHEN calc.ActiveRows >= @ColumnstorePracticalRowMax THEN 1 ELSE 0 END) AS RowGroupsFull, | |
AVG(calc.ActiveRows) AS AverageRowGroupRows, | |
AVG(calc.ActiveRows - calc.DeletedRows) AS AverageActiveRowGroupRows, | |
CASE | |
WHEN SUM(calc.NonFilledActiveRowGroupRows) = 0 THEN 0 | |
ELSE SUM(calc.NonDeletedNonFilledActiveRowGroups) / SUM(calc.NonFilledActiveRowGroups) | |
END AS AverageNonFilledActiveRowGroupRows | |
FROM sys.column_store_row_groups csrg | |
INNER JOIN sys.indexes i | |
ON csrg.object_id = i.object_id | |
AND csrg.index_id = i.index_id | |
INNER JOIN sys.partitions p | |
ON csrg.partition_number = p.partition_number | |
AND csrg.object_id = p.object_id | |
AND csrg.index_id = p.index_id | |
INNER JOIN sys.tables t | |
ON csrg.object_id = t.object_id | |
INNER JOIN sys.schemas s | |
ON t.schema_id = s.schema_id | |
CROSS APPLY | |
( | |
SELECT | |
CASE WHEN csrg.state <> 4 THEN 1 ELSE 0 END AS ActiveRowGroup, --Ignore TOMBSTONE partitions; they'll be removed automatically. | |
CASE WHEN csrg.state <> 4 THEN csrg.total_rows ELSE 0 END AS ActiveRows, --Ignore TOMBSTONE partitions; they'll be removed automatically. | |
CASE WHEN csrg.state = 1 THEN 1 ELSE 0 END AS DeltaStoreRowGroup, | |
CASE WHEN csrg.state = 1 THEN csrg.total_rows ELSE 0 END AS DeltaStoreRows, | |
CASE WHEN csrg.state = 2 THEN 1 ELSE 0 END AS ClosedRowGroup, | |
CASE WHEN csrg.state IN (1,2) THEN 1 ELSE 0 END AS NotCompressedRowGroup, | |
CASE WHEN csrg.state = 3 THEN 1 ELSE 0 END AS CompressedRowGroup, | |
CASE WHEN csrg.state = 3 THEN csrg.total_rows ELSE 0 END AS CompressedRows, | |
CASE WHEN csrg.state = 3 THEN deleted_rows ELSE 0 END AS DeletedRows, | |
CASE WHEN csrg.state = 3 AND csrg.total_rows < @ColumnstoreRowMax THEN total_rows - deleted_rows ELSE 0 END AS NonDeletedNonFilledActiveRowGroups, | |
CASE WHEN csrg.state = 3 AND csrg.total_rows < @ColumnstoreRowMax THEN 1 ELSE 0 END AS NonFilledActiveRowGroups, | |
CASE WHEN csrg.state = 3 AND csrg.total_rows < @ColumnstoreRowMax THEN total_rows ELSE 0 END AS NonFilledActiveRowGroupRows | |
) calc | |
GROUP BY | |
csrg.partition_number, | |
t.name, | |
s.name, | |
i.name | |
), | |
rules AS | |
( | |
SELECT | |
p.TableName, | |
p.SchemaName, | |
p.IndexName, | |
p.PartitionNumber, | |
p.NumberOfPartitions, | |
p.PartitionOrderDesc, | |
p.RowGroups, | |
p.CompressedRowGroups, | |
p.NotCompressedRowGroups, | |
p.DeltaStoreRowGroups, | |
p.DeltaStoreRows, | |
p.AverageDeltaStoreRows, | |
p.PercentCompressedRowsDeleted, | |
p.MaxPercentCompressedRowsDeleted, | |
p.RowGroupsFull, | |
p.AverageRowGroupRows, | |
p.AverageActiveRowGroupRows, | |
p.AverageNonFilledActiveRowGroupRows, | |
epc.ExpectedRowGroupCollapse, | |
--Following are the rules which help us determine whether to Reorganize the partition: | |
--If the average number of rows per row group is high enough, it appears that expected collapse doesn't actually take place, so we'd needlessly reorg over and over. | |
CASE WHEN epc.ExpectedRowGroupCollapse > 1 AND p.AverageNonFilledActiveRowGroupRows < (@ColumnstoreRowMax / 2) THEN 1 ELSE 0 END AS PartitionCompactionPossible, | |
CASE WHEN p.MaxPercentCompressedRowsDeleted > @MaxDeletedRowPercent THEN 1 ELSE 0 END AS DeletedRowCleanupViable, | |
CASE WHEN p.PartitionOrderDesc > @MaxPartitionsBeforeBeingOld AND p.NotCompressedRowGroups > 0 THEN 1 ELSE 0 END AS OldPartitionCompressionPossible, | |
CASE WHEN p.DeltaStoreRows > @MaxDeltaStoreRows THEN 1 ELSE 0 END AS DeltaStoreCompressionPossible | |
FROM cspartitions p | |
CROSS APPLY | |
( | |
SELECT | |
--If a partition is already full, there's no space to reduce partition count further. | |
p.RowGroups - p.RowGroupsFull AS NonFullPartitions | |
) calc | |
CROSS APPLY | |
( | |
SELECT | |
--Determine the minimum number of partitions we would need to have for this number of rows, ignoring any non-compressed or full row groups. | |
--EX: given 3 partitions: MAX 300K 300K. 300K+300K = 600K which fits into one partition. Expected Row Group Collapse --> 2-1 = 1 | |
--EX: given 3 partitions: MAX 600K 600K. 600K+600K = 1.2M which fits into 2 partitions. Expected Row Group Collapse --> 2-2 = 0 | |
CAST(calc.NonFullPartitions - (1.0 * p.AverageNonFilledActiveRowGroupRows * calc.NonFullPartitions / @ColumnstoreRowMax) AS INT) AS ExpectedRowGroupCollapse | |
) epc | |
) | |
SELECT | |
CONCAT(r.SchemaName, '.', r.TableName, '.', r.IndexName) AS IndexName, | |
r.PartitionNumber, | |
r.RowGroups, | |
r.CompressedRowGroups, | |
r.NotCompressedRowGroups, | |
r.DeltaStoreRowGroups, | |
r.RowGroupsFull, | |
res.ReorganizePartition, | |
mrs.ReorganizeStatement, | |
r.DeltaStoreRows, | |
r.AverageDeltaStoreRows, | |
r.PercentCompressedRowsDeleted, | |
r.MaxPercentCompressedRowsDeleted, | |
r.AverageRowGroupRows, | |
r.AverageActiveRowGroupRows, | |
r.AverageNonFilledActiveRowGroupRows, | |
CASE | |
WHEN r.PartitionCompactionPossible = 1 THEN r.ExpectedRowGroupCollapse | |
ELSE 0 | |
END AS ExpectedRowGroupCollapse, | |
r.ExpectedRowGroupCollapse AS PotentialRowGroupCollapse, | |
r.PartitionCompactionPossible, | |
r.DeletedRowCleanupViable, | |
r.OldPartitionCompressionPossible, | |
r.DeltaStoreCompressionPossible | |
FROM rules r | |
CROSS APPLY | |
( | |
SELECT | |
r.PartitionCompactionPossible + r.DeletedRowCleanupViable + r.OldPartitionCompressionPossible + r.DeltaStoreCompressionPossible AS ReorganizePartition | |
) res | |
OUTER APPLY | |
( | |
SELECT | |
CONCAT(N'ALTER INDEX ', QUOTENAME(r.IndexName), ' ON ', QUOTENAME(r.SchemaName), N'.', QUOTENAME(r.TableName), N' REORGANIZE ') AS AlterTableName, | |
CASE WHEN r.NumberOfPartitions > 1 THEN CONCAT(N'PARTITION = ', r.PartitionNumber) ELSE N'' END AS PartitionNumber | |
) calc | |
OUTER APPLY | |
( | |
SELECT | |
CONCAT(calc.AlterTableName, calc.PartitionNumber, N'WITH (COMPRESS_ALL_ROW_GROUPS = ON); ') AS ReorganizeStatement | |
WHERE | |
r.OldPartitionCompressionPossible = 1 | |
OR r.DeltaStoreCompressionPossible = 1 | |
) ors | |
OUTER APPLY | |
( | |
SELECT | |
CONCAT(ors.ReorganizeStatement, calc.AlterTableName, calc.PartitionNumber) AS ReorganizeStatement | |
) mrs | |
WHERE | |
res.ReorganizePartition >= @ReorganizeThreshold | |
ORDER BY | |
r.PartitionOrderDesc; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment