Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Run this to get a list of columnstore index partitions to determine which should be reorganized. This script also generates index reorg statements which could be run.
-------------------------------------------------------------------
-- Script Name: ColumnstoreReorg.sql
--
-- Desc: Run this to get a list of columnstore index partitions
-- to determine which should be reorganized. The script also
-- generates index reorg statements which could be run.
--
-- Notes: SQL Server 2016 is REQUIRED. SQL Server 2016 changed
-- what index reorganization does to columnstore indexes,
-- and this script takes advantage of those changes.
--
-- Auth: Kevin Feasel
-- Date: 2016-03-21
--
-- Change History
-- --------------
-- Date - Auth: YYYY-MM-DD - Author
-- Description: DESCRIPTION
--------------------------------------------------------------------
--Helpful articles:
--https://blogs.msdn.microsoft.com/sqlserverstorageengine/2016/03/07/columnstore-index-defragmentation-using-reorganize-command/
--https://blogs.msdn.microsoft.com/sqlserverstorageengine/2016/03/08/columnstore-index-merge-policy-for-reorganize/
DECLARE
@ColumnstoreRowMax INT = 1048576,
--This is set to 900K instead of 1048576 because there are scenarios where row group N-1 might have 910K+ rows and group N ~100K. Smashing
--them together would lead to more than 1048576 rows, so Reorganize's behavior is to pack row group N-1 most of the way but leave enough in N
--to make sense.
@ColumnstorePracticalRowMax INT = 900000,
--How many rows we see in the Delta Store before deciding we want to compress this rowgroup (without waiting for the tuple mover to do its job).
@MaxDeltaStoreRows INT = 1048576,
@MaxPartitionsBeforeBeingOld INT = 3,
@MaxDeletedRowPercent INT = 15,
--How many rules should fail before we give the signal to reorganize a partition. 0 = show all partitions.
@ReorganizeThreshold INT = 0;
WITH cspartitions AS
(
SELECT
csrg.partition_number AS PartitionNumber,
COUNT(1) OVER (PARTITION BY s.name, t.name, i.name) AS NumberOfPartitions,
t.name AS TableName,
s.name AS SchemaName,
i.name AS IndexName,
DENSE_RANK() OVER (ORDER BY csrg.partition_number DESC) AS PartitionOrderDesc,
SUM(calc.ActiveRowGroup) AS RowGroups,
SUM(calc.CompressedRowGroup) AS CompressedRowGroups,
SUM(calc.NotCompressedRowGroup) AS NotCompressedRowGroups,
SUM(calc.DeltaStoreRowGroup) AS DeltaStoreRowGroups,
SUM(calc.DeltaStoreRows) AS DeltaStoreRows,
CASE
WHEN SUM(calc.DeltaStoreRowGroup) = 0 THEN 0
ELSE 1.0 * SUM(calc.DeltaStoreRows) / SUM(calc.DeltaStoreRowGroup)
END AS AverageDeltaStoreRows,
CASE
WHEN SUM(calc.CompressedRows) = 0 THEN 0.00
ELSE 100.0 * SUM(calc.DeletedRows) / SUM(calc.CompressedRows)
END AS PercentCompressedRowsDeleted,
MAX(CASE WHEN calc.CompressedRowGroup = 1 THEN 100.0 * calc.DeletedRows / calc.CompressedRows ELSE 0 END) AS MaxPercentCompressedRowsDeleted,
SUM(CASE WHEN calc.ActiveRows >= @ColumnstorePracticalRowMax THEN 1 ELSE 0 END) AS RowGroupsFull,
AVG(calc.ActiveRows) AS AverageRowGroupRows,
AVG(calc.ActiveRows - calc.DeletedRows) AS AverageActiveRowGroupRows,
CASE
WHEN SUM(calc.NonFilledActiveRowGroupRows) = 0 THEN 0
ELSE SUM(calc.NonDeletedNonFilledActiveRowGroups) / SUM(calc.NonFilledActiveRowGroups)
END AS AverageNonFilledActiveRowGroupRows
FROM sys.column_store_row_groups csrg
INNER JOIN sys.indexes i
ON csrg.object_id = i.object_id
AND csrg.index_id = i.index_id
INNER JOIN sys.partitions p
ON csrg.partition_number = p.partition_number
AND csrg.object_id = p.object_id
AND csrg.index_id = p.index_id
INNER JOIN sys.tables t
ON csrg.object_id = t.object_id
INNER JOIN sys.schemas s
ON t.schema_id = s.schema_id
CROSS APPLY
(
SELECT
CASE WHEN csrg.state <> 4 THEN 1 ELSE 0 END AS ActiveRowGroup, --Ignore TOMBSTONE partitions; they'll be removed automatically.
CASE WHEN csrg.state <> 4 THEN csrg.total_rows ELSE 0 END AS ActiveRows, --Ignore TOMBSTONE partitions; they'll be removed automatically.
CASE WHEN csrg.state = 1 THEN 1 ELSE 0 END AS DeltaStoreRowGroup,
CASE WHEN csrg.state = 1 THEN csrg.total_rows ELSE 0 END AS DeltaStoreRows,
CASE WHEN csrg.state = 2 THEN 1 ELSE 0 END AS ClosedRowGroup,
CASE WHEN csrg.state IN (1,2) THEN 1 ELSE 0 END AS NotCompressedRowGroup,
CASE WHEN csrg.state = 3 THEN 1 ELSE 0 END AS CompressedRowGroup,
CASE WHEN csrg.state = 3 THEN csrg.total_rows ELSE 0 END AS CompressedRows,
CASE WHEN csrg.state = 3 THEN deleted_rows ELSE 0 END AS DeletedRows,
CASE WHEN csrg.state = 3 AND csrg.total_rows < @ColumnstoreRowMax THEN total_rows - deleted_rows ELSE 0 END AS NonDeletedNonFilledActiveRowGroups,
CASE WHEN csrg.state = 3 AND csrg.total_rows < @ColumnstoreRowMax THEN 1 ELSE 0 END AS NonFilledActiveRowGroups,
CASE WHEN csrg.state = 3 AND csrg.total_rows < @ColumnstoreRowMax THEN total_rows ELSE 0 END AS NonFilledActiveRowGroupRows
) calc
GROUP BY
csrg.partition_number,
t.name,
s.name,
i.name
),
rules AS
(
SELECT
p.TableName,
p.SchemaName,
p.IndexName,
p.PartitionNumber,
p.NumberOfPartitions,
p.PartitionOrderDesc,
p.RowGroups,
p.CompressedRowGroups,
p.NotCompressedRowGroups,
p.DeltaStoreRowGroups,
p.DeltaStoreRows,
p.AverageDeltaStoreRows,
p.PercentCompressedRowsDeleted,
p.MaxPercentCompressedRowsDeleted,
p.RowGroupsFull,
p.AverageRowGroupRows,
p.AverageActiveRowGroupRows,
p.AverageNonFilledActiveRowGroupRows,
epc.ExpectedRowGroupCollapse,
--Following are the rules which help us determine whether to Reorganize the partition:
--If the average number of rows per row group is high enough, it appears that expected collapse doesn't actually take place, so we'd needlessly reorg over and over.
CASE WHEN epc.ExpectedRowGroupCollapse > 1 AND p.AverageNonFilledActiveRowGroupRows < (@ColumnstoreRowMax / 2) THEN 1 ELSE 0 END AS PartitionCompactionPossible,
CASE WHEN p.MaxPercentCompressedRowsDeleted > @MaxDeletedRowPercent THEN 1 ELSE 0 END AS DeletedRowCleanupViable,
CASE WHEN p.PartitionOrderDesc > @MaxPartitionsBeforeBeingOld AND p.NotCompressedRowGroups > 0 THEN 1 ELSE 0 END AS OldPartitionCompressionPossible,
CASE WHEN p.DeltaStoreRows > @MaxDeltaStoreRows THEN 1 ELSE 0 END AS DeltaStoreCompressionPossible
FROM cspartitions p
CROSS APPLY
(
SELECT
--If a partition is already full, there's no space to reduce partition count further.
p.RowGroups - p.RowGroupsFull AS NonFullPartitions
) calc
CROSS APPLY
(
SELECT
--Determine the minimum number of partitions we would need to have for this number of rows, ignoring any non-compressed or full row groups.
--EX: given 3 partitions: MAX 300K 300K. 300K+300K = 600K which fits into one partition. Expected Row Group Collapse --> 2-1 = 1
--EX: given 3 partitions: MAX 600K 600K. 600K+600K = 1.2M which fits into 2 partitions. Expected Row Group Collapse --> 2-2 = 0
CAST(calc.NonFullPartitions - (1.0 * p.AverageNonFilledActiveRowGroupRows * calc.NonFullPartitions / @ColumnstoreRowMax) AS INT) AS ExpectedRowGroupCollapse
) epc
)
SELECT
CONCAT(r.SchemaName, '.', r.TableName, '.', r.IndexName) AS IndexName,
r.PartitionNumber,
r.RowGroups,
r.CompressedRowGroups,
r.NotCompressedRowGroups,
r.DeltaStoreRowGroups,
r.RowGroupsFull,
res.ReorganizePartition,
mrs.ReorganizeStatement,
r.DeltaStoreRows,
r.AverageDeltaStoreRows,
r.PercentCompressedRowsDeleted,
r.MaxPercentCompressedRowsDeleted,
r.AverageRowGroupRows,
r.AverageActiveRowGroupRows,
r.AverageNonFilledActiveRowGroupRows,
CASE
WHEN r.PartitionCompactionPossible = 1 THEN r.ExpectedRowGroupCollapse
ELSE 0
END AS ExpectedRowGroupCollapse,
r.ExpectedRowGroupCollapse AS PotentialRowGroupCollapse,
r.PartitionCompactionPossible,
r.DeletedRowCleanupViable,
r.OldPartitionCompressionPossible,
r.DeltaStoreCompressionPossible
FROM rules r
CROSS APPLY
(
SELECT
r.PartitionCompactionPossible + r.DeletedRowCleanupViable + r.OldPartitionCompressionPossible + r.DeltaStoreCompressionPossible AS ReorganizePartition
) res
OUTER APPLY
(
SELECT
CONCAT(N'ALTER INDEX ', QUOTENAME(r.IndexName), ' ON ', QUOTENAME(r.SchemaName), N'.', QUOTENAME(r.TableName), N' REORGANIZE ') AS AlterTableName,
CASE WHEN r.NumberOfPartitions > 1 THEN CONCAT(N'PARTITION = ', r.PartitionNumber) ELSE N'' END AS PartitionNumber
) calc
OUTER APPLY
(
SELECT
CONCAT(calc.AlterTableName, calc.PartitionNumber, N'WITH (COMPRESS_ALL_ROW_GROUPS = ON); ') AS ReorganizeStatement
WHERE
r.OldPartitionCompressionPossible = 1
OR r.DeltaStoreCompressionPossible = 1
) ors
OUTER APPLY
(
SELECT
CONCAT(ors.ReorganizeStatement, calc.AlterTableName, calc.PartitionNumber) AS ReorganizeStatement
) mrs
WHERE
res.ReorganizePartition >= @ReorganizeThreshold
ORDER BY
r.PartitionOrderDesc;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment