Skip to content

Instantly share code, notes, and snippets.

@LitKnd
Created January 25, 2018 18:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save LitKnd/025086682932cb3666e260b5b7499df0 to your computer and use it in GitHub Desktop.
Save LitKnd/025086682932cb3666e260b5b7499df0 to your computer and use it in GitHub Desktop.
Kendra-Little_When-Partitioning-Indexes-Hurts-Performance_20171017
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
Setup:
Download BabbyNames.bak.zip (41 MB zipped database backup)
https://github.com/LitKnd/BabbyNames/releases/tag/v1.1
Then review and run the script below on a SQL Server 2017 dedicated test instance
Developer Edition recommended (Enteprise and Evaluation Editions will work too)
The script
Restores the database (edit the file locations for your instance)
Expands and modifies the data
21GB data files (multiple files in a couple of filegroups)
3GB log file
Duration on my test instance (4 vCPUs, SSDs): ~28 minutes
This requires some tempdb space to do sorts, etc. I ran with 4x 1GB tempdb files.
*****************************************************************************/
SET XACT_ABORT, NOCOUNT, ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING ON;
GO
EXEC sp_configure 'show advanced options', 1;
GO
RECONFIGURE
GO
EXEC sp_configure 'max degree of parallelism', 4;
GO
EXEC sp_configure 'cost threshold for parallelism', 100
GO
EXEC sp_configure 'max server memory (MB)', 7000;
GO
RECONFIGURE
GO
/****************************************************
Restore small BabbyNames database
****************************************************/
use master;
GO
IF DB_ID('BabbyNames201711') IS NOT NULL
BEGIN
IF (SELECT state_desc FROM sys.databases WHERE name='BabbyNames201711') = 'ONLINE'
ALTER DATABASE BabbyNames201711
SET SINGLE_USER
WITH ROLLBACK IMMEDIATE;
END
GO
RESTORE DATABASE BabbyNames201711
FROM DISK=N'S:\MSSQL\Backup\BabbyNames.bak'
WITH
MOVE 'BabbyNames' TO 'S:\MSSQL\DATA\BabbyNames201711.mdf',
MOVE 'BabbyNames_log' TO 'S:\MSSQL\DATA\BabbyNames201711_log.ldf',
REPLACE,
RECOVERY;
GO
ALTER DATABASE BabbyNames201711 SET RECOVERY SIMPLE;
GO
ALTER DATABASE BabbyNames201711
ADD FILEGROUP fg_FirstNameByBirthDate;
GO
ALTER DATABASE BabbyNames201711 ADD FILE
( NAME = fg_FirstNameByBirthDate_f1,
FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f1.ndf',
SIZE = 5GB,
FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO
ALTER DATABASE BabbyNames201711 ADD FILE
( NAME = fg_FirstNameByBirthDate_f2,
FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f2.ndf',
SIZE = 5GB,
FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO
ALTER DATABASE BabbyNames201711 ADD FILE
( NAME = fg_FirstNameByBirthDate_f3,
FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f3.ndf',
SIZE = 5GB,
FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO
ALTER DATABASE BabbyNames201711 ADD FILE
( NAME = fg_FirstNameByBirthDate_f4,
FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f4.ndf',
SIZE = 5GB,
FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO
/* Make fg_FirstNameByBirthDate the default filegroup */
ALTER DATABASE BabbyNames201711 MODIFY FILEGROUP fg_FirstNameByBirthDate DEFAULT;
GO
ALTER DATABASE BabbyNames201711 MODIFY FILE (NAME='BabbyNames_log', SIZE=3GB, FILEGROWTH=512MB);
GO
/* just in case you want to play around with Hekaton... */
ALTER DATABASE BabbyNames201711 ADD FILEGROUP MemoryOptimizedData CONTAINS MEMORY_OPTIMIZED_DATA;
GO
ALTER DATABASE BabbyNames201711
ADD FILE( NAME = 'MemoryOptimizedData' , FILENAME = 'S:\MSSQL\Data\BabbyNames201711_MemoryOptimizedData')
TO FILEGROUP MemoryOptimizedData;
GO
/****************************************************
Configure database and expand data
****************************************************/
SET STATISTICS IO, TIME OFF;
GO
SET XACT_ABORT, NOCOUNT ON;
GO
SET ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING, ANSI_WARNINGS, ARITHABORT, CONCAT_NULL_YIELDS_NULL ON;
GO
SET NUMERIC_ROUNDABORT OFF;
GO
ALTER DATABASE BabbyNames201711 SET QUERY_STORE = ON
GO
ALTER DATABASE BabbyNames201711 SET QUERY_STORE
(OPERATION_MODE = READ_WRITE,
CLEANUP_POLICY = (STALE_QUERY_THRESHOLD_DAYS = 365), MAX_STORAGE_SIZE_MB = 1024
)
GO
ALTER DATABASE BabbyNames201711 SET COMPATIBILITY_LEVEL=140;
GO
ALTER DATABASE BabbyNames201711 SET TARGET_RECOVERY_TIME = 60 SECONDS;
GO
USE BabbyNames201711;
GO
EXEC evt.logme N'Restored small BabbyNames201711 database';
GO
ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF;
GO
ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF;
GO
/******************************************************/
/* ref.Numbers */
/******************************************************/
/* Create ref.Numbers. This is a helper "numbers" table just to help us in the next step.*/
IF SCHEMA_ID('ref') IS NULL
BEGIN
EXEC evt.logme N'Create schema ref.';
EXEC ('CREATE SCHEMA ref AUTHORIZATION dbo');
END
GO
EXEC evt.logme N'Create ref.Numbers.';
GO
IF OBJECT_ID('ref.Numbers','U') IS NOT NULL
BEGIN
EXEC evt.logme N'Table ref.Numbers already exists, dropping.';
DROP TABLE ref.Numbers;
END
GO
CREATE TABLE ref.Numbers (
Num INT NOT NULL,
) on fg_FirstNameByBirthDate;
GO
EXEC evt.logme N'Load ref.Numbers.';
GO
INSERT ref.Numbers
(Num)
SELECT TOP 10000000
ROW_NUMBER() OVER (ORDER BY fn1.ReportYear)
FROM agg.FirstNameByYear AS fn1
CROSS JOIN agg.FirstNameByYear AS fn2;
GO
EXEC evt.logme N'Index and key ref.Numbers.';
GO
CREATE CLUSTERED COLUMNSTORE INDEX ccx_ref_Numbers on ref.Numbers;
GO
ALTER TABLE ref.Numbers
ADD CONSTRAINT pk_refNumbers_Num
PRIMARY KEY NONCLUSTERED (Num)
ON fg_FirstNameByBirthDate;
GO
/******************************************************/
/* Helper index */
/******************************************************/
EXEC evt.logme N'Create nccx_halp ON agg.FirstNameByYear.';
GO
CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_halp on agg.FirstNameByYear
(FirstNameId, ReportYear, NameCount, Gender)
ON fg_FirstNameByBirthDate
GO
/******************************************************/
/* Create and load dbo.FirstNameByBirthDate */
/******************************************************/
EXEC evt.logme N'Load dbo.FirstNameByBirthDateStage';
GO
/* This select into gets parallel insert plan, 20170906.
We set the default filegroup above so this will go into fg_FirstNameByBirthDate,
I am just using the hint added in 2017 for future proofing. */
SELECT
ISNULL(CAST(ROW_NUMBER() OVER (ORDER BY (SELECT 'boop')) AS BIGINT),0) AS FirstNameByBirthDateId,
DATEADD(mi,n.Num * 5.1,CAST('1/1/' + CAST(ReportYear AS CHAR(4)) AS datetime2(0))) as FakeBirthDateStamp,
fn.StateCode,
fn.FirstNameId,
Gender,
CAST(NULL AS TINYINT) as Flag1,
CAST(NULL AS CHAR(1)) as Flag2
INTO dbo.FirstNameByBirthDateStage ON fg_FirstNameByBirthDate /* This added in SQL Server 2017*/
FROM agg.FirstNameByYearState AS fn
CROSS APPLY (select Num from ref.Numbers where Num <= fn.NameCount) AS n
WHERE fn.ReportYear >= 1970 /* Limit size of dataset here */
OPTION (RECOMPILE);
GO
EXEC evt.logme N'Add BirthYear computed column to dbo.FirstNameByBirthDateStage';
GO
ALTER TABLE dbo.FirstNameByBirthDateStage
ADD BirthYear as YEAR(FakeBirthDateStamp);
GO
EXEC evt.logme N'Create dbo.FirstNameByBirthDate, which has an identity property';
GO
CREATE TABLE dbo.FirstNameByBirthDate (
FirstNameByBirthDateId BIGINT IDENTITY(1,1),
FakeBirthDateStamp DATETIME2(0),
StateCode CHAR(2) NOT NULL,
FirstNameId INT NOT NULL,
Gender CHAR(1) NOT NULL,
Flag1 TINYINT NULL,
Flag2 CHAR(1) NULL,
BirthYear AS YEAR(FakeBirthDateStamp)
) ON fg_FirstNameByBirthDate
GO
EXEC evt.logme N'Switch data from dbo.FirstNameByBirthDateStage to dbo.FirstNameByBirthDate';
GO
ALTER TABLE dbo.FirstNameByBirthDateStage SWITCH TO dbo.FirstNameByBirthDate;
GO
EXEC evt.logme N'DBCC CHECKIDENT RESEED for dbo.FirstNameByBirthDateStage';
GO
DBCC CHECKIDENT ('dbo.FirstNameByBirthDate', RESEED);
GO
EXEC evt.logme N'DROP TABLE FirstNameByBirthDateStage';
GO
DROP TABLE FirstNameByBirthDateStage;
GO
EXEC evt.logme N'Create clustered PK on dbo.FirstNameByBirthDate';
GO
ALTER TABLE dbo.FirstNameByBirthDate
ADD CONSTRAINT pk_FirstNameByBirthDate_FirstNameByBirthDateId
PRIMARY KEY CLUSTERED (FirstNameByBirthDateId)
WITH (SORT_IN_TEMPDB = ON,
DATA_COMPRESSION = ROW)
ON fg_FirstNameByBirthDate;
GO
EXEC evt.logme N'Clean up nccx_halp ON agg.FirstNameByYear.';
GO
DROP INDEX IF EXISTS nccx_halp ON agg.FirstNameByYear;
GO
/******************************************************/
/* Partitioned table */
/******************************************************/
EXEC evt.logme N'CREATE SCHEMA pt AUTHORIZATION dbo';
GO
CREATE SCHEMA pt AUTHORIZATION dbo;
GO
EXEC evt.logme N'CREATE PARTITION FUNCTION pf_fnbd from 1879 to 2020.';
GO
declare @dsql nvarchar(max)=N'
CREATE PARTITION FUNCTION pf_fnbd (DATETIME2(0))
AS RANGE RIGHT
FOR VALUES ( ' + (
SELECT STUFF(
(SELECT ', ''' + CAST(Num as CHAR(4)) + '-01-01'''
FROM ref.Numbers
WHERE Num between 1959 and 2020
FOR XML PATH(''))
,1,1,'')
)+N'
);'
EXEC (@dsql);
GO
EXEC evt.logme N'CREATE PARTITION SCHEME ps_fnbd.';
GO
CREATE PARTITION SCHEME ps_fnbd
AS PARTITION pf_fnbd
ALL TO (fg_FirstNameByBirthDate);
GO
EXEC evt.logme N'CREATE table pt.FirstNameByBirthDate';
GO
CREATE TABLE pt.FirstNameByBirthDate (
FakeBirthDateStamp DATETIME2(0) NOT NULL,
FirstNameByBirthDateId BIGINT IDENTITY NOT NULL,
StateCode CHAR(2) NOT NULL,
FirstNameId INT NULL,
Gender CHAR(1) NULL,
Flag1 TINYINT NULL,
Flag2 CHAR(1) NULL,
BirthYear AS YEAR(FakeBirthDateStamp)
) ON ps_fnbd (FakeBirthDateStamp)
GO
EXEC evt.logme N'Insert data from dbo.FirstNameByBirthDate';
GO
SET IDENTITY_INSERT pt.FirstNameByBirthDate ON;
GO
INSERT pt.FirstNameByBirthDate WITH (TABLOCK)
(FirstNameByBirthDateId, FakeBirthDateStamp, StateCode, FirstNameId, Gender, Flag1, Flag2)
SELECT FirstNameByBirthDateId, FakeBirthDateStamp, StateCode, FirstNameId, Gender, Flag1, Flag2
FROM dbo.FirstNameByBirthDate
LEFT JOIN ref.Numbers as n on 1 = -1 /* pointless join to get batch mode on sort operator */
;
GO
SET IDENTITY_INSERT pt.FirstNameByBirthDate OFF;
GO
EXEC evt.logme N'CREATE UNIQUE CLUSTERED INDEX cx_pt_FirstNameByBirthDate';
GO
CREATE UNIQUE CLUSTERED INDEX cx_pt_FirstNameByBirthDate ON
pt.FirstNameByBirthDate (FirstNameByBirthDateId, FakeBirthDateStamp)
WITH (SORT_IN_TEMPDB = ON,
DATA_COMPRESSION = ROW)
ON ps_fnbd (FakeBirthDateStamp);
GO
EXEC evt.logme N'DBCC CHECKIDENT RESEED for pt.FirstNameByBirthDateStage';
GO
DBCC CHECKIDENT ('pt.FirstNameByBirthDate', RESEED);
GO
/******************************************************/
/* Foreign key constraints */
/******************************************************/
EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_FirstNameId';
GO
ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK
ADD CONSTRAINT FK_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId)
REFERENCES ref.FirstName (FirstNameId);
GO
EXEC evt.logme N'Create FK FK_pt_FirstNameByBirthDate_FirstNameId';
GO
ALTER TABLE pt.FirstNameByBirthDate WITH CHECK
ADD CONSTRAINT FK_pt_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId)
REFERENCES ref.FirstName (FirstNameId);
GO
EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_StateCode';
GO
ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK
ADD CONSTRAINT FK_FirstNameByBirthDate_StateCode FOREIGN KEY (StateCode)
REFERENCES ref.State (StateCode);
GO
EXEC evt.logme N'Create FK FK_pt_FirstNameByBirthDate_StateCode';
GO
ALTER TABLE pt.FirstNameByBirthDate WITH CHECK
ADD CONSTRAINT FK_pt_FirstNameByBirthDate_StateCode FOREIGN KEY (StateCode)
REFERENCES ref.State (StateCode);
GO
/******************************************************/
/* Indexes */
/******************************************************/
CREATE NONCLUSTERED INDEX dbo_FirstNameByBirthDate_BirthYear
ON dbo.FirstNameByBirthDate ( BirthYear ASC );
GO
CREATE NONCLUSTERED INDEX pt_FirstNameByBirthDate_BirthYear
ON pt.FirstNameByBirthDate ( BirthYear ASC );
GO
CREATE NONCLUSTERED INDEX dbo_FirstNameByBirthDate_FirstNameId
ON dbo.FirstNameByBirthDate ( FirstNameId ASC );
GO
CREATE NONCLUSTERED INDEX pt_FirstNameByBirthDate_FirstNameId
ON pt.FirstNameByBirthDate ( FirstNameId ASC );
GO
CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_pt_FirstNameByBirthDate
ON pt.FirstNameByBirthDate�
( FakeBirthDateStamp, FirstNameByBirthDateId, StateCode, FirstNameId, Gender, Flag1, Flag2);
GO
/******************************************************/
/* Backup */
/******************************************************/
EXEC evt.logme N'Run a full backup';
GO
BACKUP DATABASE BabbyNames201711
TO DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak',
DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak',
DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak',
DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak'
WITH INIT, COMPRESSION, STATS=5;
GO
/* Restore command (for reference) */
--use master;
--GO
--IF DB_ID('BabbyNames201711') IS NOT NULL
--BEGIN
-- ALTER DATABASE BabbyNames201711
-- SET SINGLE_USER
-- WITH ROLLBACK IMMEDIATE;
--END
--GO
--RESTORE DATABASE BabbyNames201711
-- FROM DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak',
-- DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak',
-- DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak',
-- DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak'
-- WITH
-- MOVE 'BabbyNames' TO 'S:\MSSQL\Data\BabbyNames201711.mdf',
-- MOVE 'BabbyNames_log' TO 'S:\MSSQL\Data\BabbyNames201711_log.ldf',
-- REPLACE,
-- RECOVERY;
--GO
/******************************************************/
/* All done */
/******************************************************/
EXEC evt.logme N'BEEP BOOP WE ARE DONE';
GO
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
The script restores the database created and configured in 00-populate-BabbyNames201711.sql
If you've just run 00-populate-BabbyNames201711.sql, you don't have to run this.
If you want to reset your environment without rerunning 00-populate-BabbyNames201711.sql,
modify the file locations here to restore the database.
*****************************************************************************/
SET XACT_ABORT, NOCOUNT, ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING ON;
GO
EXEC sp_configure 'show advanced options', 1;
GO
RECONFIGURE
GO
EXEC sp_configure 'max degree of parallelism', 4;
GO
EXEC sp_configure 'cost threshold for parallelism', 100
GO
EXEC sp_configure 'max server memory (MB)', 7000;
GO
RECONFIGURE
GO
/****************************************************
Restore database
****************************************************/
use master;
GO
IF DB_ID('BabbyNames201711') IS NOT NULL
BEGIN
IF (SELECT state_desc FROM sys.databases WHERE name='BabbyNames201711') = 'ONLINE'
ALTER DATABASE BabbyNames201711
SET SINGLE_USER
WITH ROLLBACK IMMEDIATE;
END
GO
/* What can I say, I like to carefully lay out my files.*/
RESTORE DATABASE BabbyNames201711
FROM DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak',
DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak',
DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak',
DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak'
WITH
MOVE 'BabbyNames' TO 'S:\MSSQL\Data\BabbyNames201711.mdf',
MOVE 'fg_FirstNameByBirthDate_f1' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f1.ndf',
MOVE 'fg_FirstNameByBirthDate_f2' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f2.ndf',
MOVE 'fg_FirstNameByBirthDate_f3' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f3.ndf',
MOVE 'fg_FirstNameByBirthDate_f4' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f4.ndf',
MOVE 'BabbyNames_log' TO 'S:\MSSQL\Data\BabbyNames201711_log.ldf',
MOVE 'MemoryOptimizedData' TO 'S:\MSSQL\Data\BabbyNames201711_MemoryOptimizedData',
REPLACE,
RECOVERY;
GO
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
*****************************************************************************/
RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG;
GO
USE BabbyNames201711;
GO
SET NOCOUNT ON;
GO
/********************************************************/
/* Problem Query: The aggregator */
/********************************************************/
/* Run these with actual plans enabled.
Compare the estimated cost (CTP is set at 100)
Compare the query time stats
Show why the columnstore index is drunk here.
*/
SELECT
BirthYear,
COUNT(*) as NameCount
FROM dbo.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC;
GO
SELECT
BirthYear,
COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC;
GO
/* OK, let's just not use the columnstore.
Let's use the partitioned rowstore index on BirthYear.
In my version/compat level, I don't get batch mode with this hint.
Are we getting partition elimination or segment elimination?
*/
SET STATISTICS IO, TIME ON;
GO
SELECT
BirthYear,
COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX);
GO
SET STATISTICS IO, TIME OFF;
GO
/* It's still slower!
Example runtimes...
Non-partitioned rowstore index on BirthYear: CPU time = 5,157 ms, elapsed time = 1,662 ms.
Partitioned rowstore index on BirthYear: CPU time = 10,547 ms, elapsed time = 2,939 ms.
Partitioned columnstore index re-computing BirthYear: CPU time = 17,563 ms, elapsed time = 18,094 ms.
*/
/* Head back to the slides to explain why the partitioned index is different */
/*************************************************************************
Fixes
*************************************************************************/
/***************************
FIX 1 - NONALIGNED INDEX
***************************/
/* We can create a "non-aligned", non-partitioned index on our partitioned
table. Just specify a filegroup rather than a partition scheme */
/* I'm giving it a short (terrible) name just to make it easy to identify in the execution plan */
/* This takes ~2.5 minutes */
CREATE INDEX nonaligned
on pt.FirstNameByBirthDate (BirthYear)
WITH (SORT_IN_TEMPDB = ON)
ON [fg_FirstNameByBirthDate];
GO
/* Does SQL Server want to use 'nonaligned'? */
SELECT
BirthYear,
COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC
GO
SELECT
BirthYear,
COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX);
GO
/*
So in our case we have to add a hint (due to that pesky computed column!)
And we've lost the ability to do partition level operations:
switch any partition in
switch any partition out
truncate any partition
We have to drop or disable all non-aligned indexes to do ANY partition level operation.
*/
TRUNCATE TABLE pt.FirstNameByBirthDate
WITH (PARTITIONS (1 TO 4));
GO
DROP INDEX IF EXISTS nonaligned
ON pt.FirstNameByBirthDate;
GO
/***************************
FIX 1 - NOT SO GREAT.
***************************/
/***************************
FIX 2 - QUERY REWRITE TO GET
PARTION ELIMINATION
***************************/
/* We can rewrite the query to get partition elimination.
We're still going to have to do the hash aggregate, but we'll
do it for fewer partitions */
/* Run with actual plans on.
Look at how many actual partitions were used in the plan.
Confirm the number of rowgroups read in stats io output*/
SET STATISTICS IO, TIME ON;
GO
SELECT
BirthYear,
COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE
FakeBirthDateStamp >= CAST('2001-01-01' AS DATETIME2(0)) and
FakeBirthDateStamp < CAST('2016-01-01' AS DATETIME2(0))
GROUP BY BirthYear
ORDER BY COUNT(*) DESC;
GO
SET STATISTICS IO, TIME OFF;
GO
/* It's still slower! Look at the plan to see why...
Rewritten query / partitioned columnstore index: CPU time = 5,734 ms, elapsed time = 5,894 ms.
Non-partitioned rowstore index on BirthYear: CPU time = 5,157 ms, elapsed time = 1,662 ms.
Partitioned rowstore index on BirthYear: CPU time = 10,547 ms, elapsed time = 2,939 ms.
Partitioned columnstore index re-computing BirthYear: CPU time = 17,563 ms, elapsed time = 18,094 ms.
*/
/* We're leveraging the partitioning key now, so we are getting partition elimination.
However, this is still getting costed very low, and it's going single threaded.
It seems to be under-costing the work that compute scalar's doing to re-calc BirthYear to me.
*/
/* We could try to force parallelism using undocumented TF 8649.
Or on SQL Server 2016 SP1 CU2+ and SQL Server 2017, we could use undocumented hint ENABLE_PARALLEL_PLAN_PREFERENCE
http://www.queryprocessor.com/how-to-get-a-parallel-plan/ (Dmitry Pilugin)
*/
/* Notice that it's not in this list... */
SELECT * FROM sys.dm_exec_valid_use_hints;
GO
SET STATISTICS IO, TIME ON;
GO
SELECT
BirthYear,
COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE
FakeBirthDateStamp >= CAST('2001-01-01' AS DATETIME2(0)) and
FakeBirthDateStamp < CAST('2016-01-01' AS DATETIME2(0))
GROUP BY BirthYear
ORDER BY COUNT(*) DESC
OPTION (USE HINT ('ENABLE_PARALLEL_PLAN_PREFERENCE'))
GO
SET STATISTICS IO, TIME OFF;
GO
/* It's still slower! Look at the plan to see why...
Rewritten query / partitioned columnstore index: CPU time = 5,734 ms, elapsed time = 5,894 ms.
Rewritten query with hint forcing parallelism: CPU time = 7,188 ms, elapsed time = 1,907 ms.
Non-partitioned rowstore index on BirthYear: CPU time = 5,157 ms, elapsed time = 1,662 ms.
Partitioned rowstore index on BirthYear: CPU time = 10,547 ms, elapsed time = 2,939 ms.
Partitioned columnstore index re-computing BirthYear: CPU time = 17,563 ms, elapsed time = 18,094 ms.
*/
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
*****************************************************************************/
RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG;
GO
USE BabbyNames201711;
GO
/********************************************************/
/* Problem: MIN / MAX get slow */
/********************************************************/
/* Run these with actual plans enabled.
Note the differences between the plans.
It knows the query against the partitioned table will be more expensive.
Look at elapsed time in each plan.
Query 1: look at the seek operator and explain why that was fast.
Query 2: How many partitions were actually used?
*/
/* Why is the second query slower? */
SET STATISTICS IO, TIME ON;
GO
SELECT MAX(FirstNameId) AS max_val
FROM dbo.FirstNameByBirthDate
GO
SELECT MAX(FirstNameId) AS max_val
FROM pt.FirstNameByBirthDate with (index (pt_FirstNameByBirthDate_FirstNameId))
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX)
GO
EXEC sp_helpindex 'pt.FirstNameByBirthDate'
SET STATISTICS IO, TIME OFF;
GO
/*
Non-partitioned rowstore index on FirstNameId: CPU time = 0 ms, elapsed time = 0 ms.
Partitioned rowstore index on FirstNameId: CPU time = 26,530 ms, elapsed time = 8,110 ms.
*/
/* MIN has the same issue (although of course a different resulting value) */
SELECT MIN(FirstNameId) AS min_val
FROM pt.FirstNameByBirthDate
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX)
GO
/* To the slides, to talk this through! */
/*************************************************************************
Fixes
*************************************************************************/
/***************************
FIX 1 - Non-Aligned Index
***************************/
/* We can create a "non-aligned", non-partitioned index on our partitioned
table. Just specify a filegroup rather than a partition scheme */
/* This takes ~2 minutes to create */
CREATE INDEX nonaligned
on pt.FirstNameByBirthDate (FirstNameId)
WITH (SORT_IN_TEMPDB = ON)
ON [fg_FirstNameByBirthDate];
GO
/* Now we get the "non-partitioned" plan .... */
SELECT MAX(FirstNameId) AS max_val
FROM pt.FirstNameByBirthDate;
GO
/* But we've lost the ability to do partition level operations:
switch any partition in
switch any partition out
truncate any partition
We have to drop or disable all non-aligned indexes to do ANY partition level operation.
*/
TRUNCATE TABLE pt.FirstNameByBirthDate
WITH (PARTITIONS (1 TO 4));
GO
DROP INDEX IF EXISTS nonaligned
ON pt.FirstNameByBirthDate;
GO
/***************************
FIX 2 - Query rewrite
***************************/
/* OK, let's practice the recommended workaround from the Connect Item */
/* Our solution relies on the built-in $partition function.
This computes which partition data is in.
Here's a simple example using it for just one row. */
SELECT $partition.pf_fnbd(FakeBirthDateStamp) as partition_number,
FakeBirthDateStamp,
FirstNameByBirthDateId,
BirthYear,
FirstNameId,
Gender
FROM pt.FirstNameByBirthDate
WHERE FirstNameByBirthDateId = 12225838
GO
/* Testing out the pattern in
https://connect.microsoft.com/SQLServer/feedback/details/240968/partition-table-using-min-max-functions-and-top-n-index-selection-and-performance */
/* We're just looking at partition #s 41, 42, 43 here */
/* We can use the function in interesting ways.
Look at the plan for this query... it does a very efficient backward scan in each partition */
SELECT MAX(max_val)
FROM
( VALUES (41), (42), (43) ) as partitiontable(num) /* 3 row table from table value constructor */
CROSS APPLY
(SELECT MAX(FirstNameId) as max_val
FROM pt.FirstNameByBirthDate
/* CROSS APPLY lets us join to the table value constructor in here */
WHERE $partition.pf_fnbd(FakeBirthDateStamp) = partitiontable.num
) AS o;
GO
/* What if we have a changing number of partitions over time? */
/* We can use the partition function "fanout" and a numbers
table to construct a table with one row for each partition number, like this */
SELECT n.Num
FROM sys.partition_functions AS pf
JOIN ref.Numbers as n on n.Num <= pf.fanout
WHERE
pf.name='pf_fnbd';
GO
/* So we can get an automatic peek into each partition which has rows using this... */
/* Note: joining to a system table prevents parallelism, so I'm doing this as a two-step
query and putting the value for @fanout into a variable */
SET STATISTICS IO, TIME ON;
GO
DECLARE @fanout int
SELECT @fanout = fanout
FROM sys.partition_functions pf
WHERE pf.name='pf_fnbd';
SELECT MAX(max_val)AS max_val
FROM (
SELECT Num
FROM ref.Numbers
WHERE Num <= @fanout
) as partitiontable(num)
CROSS APPLY
(select MAX(FirstNameId) as max_val
from pt.FirstNameByBirthDate
where $partition.pf_fnbd(FakeBirthDateStamp) = partitiontable.num
) as o;
GO
SET STATISTICS IO, TIME OFF;
GO
/*
Rewritten query for partitioned index on FirstNameId CPU time = 31 ms, elapsed time = 8 ms.
Non-partitioned rowstore index on FirstNameId: CPU time = 0 ms, elapsed time = 0 ms.
Partitioned rowstore index on FirstNameId: CPU time = 26,530 ms, elapsed time = 8,110 ms.
*/
/* The TSQL is not pretty.
Don't like it?
Vote up the bug!
https://connect.microsoft.com/SQLServer/feedback/details/240968/partition-table-using-min-max-functions-and-top-n-index-selection-and-performance
*/
/***************************
FIX 3 - Columnstore
***************************/
SET STATISTICS IO, TIME ON;
GO
SELECT MAX(FirstNameId) AS max_val
FROM pt.FirstNameByBirthDate;
GO
SET STATISTICS IO, TIME OFF;
GO
/* Enable actual plan.
Look at "Locally Aggregated Rows" on properties of scan.
Bonus: look at the number of rows coming out of the Columnstore Index Scan.
Talk about why that makes sense (it does!)
*/
SELECT MAX(FirstNameId) AS max_val
FROM pt.FirstNameByBirthDate;
GO
/*
Partitioned columnstore CPU time = 406 ms, elapsed time = 407 ms.
Rewritten query for partitioned index on FirstNameId CPU time = 31 ms, elapsed time = 8 ms.
Non-partitioned rowstore index on FirstNameId: CPU time = 0 ms, elapsed time = 0 ms.
Partitioned rowstore index on FirstNameId: CPU time = 26,530 ms, elapsed time = 8,110 ms.
*/
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
*****************************************************************************/
RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG;
GO
/********************************************************/
/* Problem: Unexpected blocking */
/********************************************************/
/* Start this query in another session.
This will take out a lock on the partition holding 2015 data */
USE BabbyNames201711;
GO
BEGIN TRAN
DECLARE @updateval DATETIME2(0)='2010-01-01 03:49:00'
UPDATE pt.FirstNameByBirthDate
SET Flag1 = 1
WHERE FakeBirthDateStamp=@updateval
and FirstNameId = 67092;
/* This query should only need to read from one partition.
Start it up in this session. */
USE BabbyNames201711;
GO
SELECT COUNT(*)
FROM pt.FirstNameByBirthDate
WHERE FakeBirthDateStamp >= '2000-01-01 00:00:00.0'
AND FakeBirthDateStamp < '2000-01-02 00:00:00.0';
GO
/* Confirm that it is blocked in a third session.
sp_WhoIsActive is a free procedure written by Adam Machanic.
Get it at WhoIsActive.com
Why is it blocked?
*/
exec sp_WhoIsActive @get_plans=1;
GO
/* Look at the predicate on the columnstore operator in the plan
There's a convert_implicit
And there's @1 and @1
Cancel the blocked query
*/
/* This is a simple query, so SQL Server is automatically parameterizing those dates.
It sees it needs to make them a DATETIME2, but it defaults to making them DATETIME2(7)
The column in the table is a DATETIME2(0).
DATETIME2(7) has a higher precedence than DATETIME2(0)
SQL Server has to up-sample EVERY SINGLE ROW in the table to DATETIME2(7) to compare values.
*/
/* Get an estimated plan for this corrected query.
Look at the seek predicate which magically appeared.
Then run the query and look at the actual plan /
actual partitions accessed. */
SELECT COUNT(*)
FROM pt.FirstNameByBirthDate
WHERE FakeBirthDateStamp > CAST('2000-01-01 00:00:00.0' AS DATETIME2(0))
AND FakeBirthDateStamp <= CAST('2000-01-02 00:00:00.0' AS DATETIME2(0))
GO
/* This also works fine */
DECLARE @FakeBirthDateStampStart DATETIME2(0) = '2000-01-01 00:00:00.0',
@FakeBirthDateStampEnd DATETIME2(0) = '2000-01-02 00:00:00.0';
SELECT COUNT(*)
FROM pt.FirstNameByBirthDate
WHERE FakeBirthDateStamp > @FakeBirthDateStampStart
AND FakeBirthDateStamp <= @FakeBirthDateStampEnd
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment