-
-
Save LitKnd/025086682932cb3666e260b5b7499df0 to your computer and use it in GitHub Desktop.
Kendra-Little_When-Partitioning-Indexes-Hurts-Performance_20171017
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/***************************************************************************** | |
Copyright (c) 2017 SQL Workbooks LLC | |
Terms of Use: https://sqlworkbooks.com/terms-of-service/ | |
Contact: help@sqlworkbooks.com | |
Setup: | |
Download BabbyNames.bak.zip (41 MB zipped database backup) | |
https://github.com/LitKnd/BabbyNames/releases/tag/v1.1 | |
Then review and run the script below on a SQL Server 2017 dedicated test instance | |
Developer Edition recommended (Enteprise and Evaluation Editions will work too) | |
The script | |
Restores the database (edit the file locations for your instance) | |
Expands and modifies the data | |
21GB data files (multiple files in a couple of filegroups) | |
3GB log file | |
Duration on my test instance (4 vCPUs, SSDs): ~28 minutes | |
This requires some tempdb space to do sorts, etc. I ran with 4x 1GB tempdb files. | |
*****************************************************************************/ | |
SET XACT_ABORT, NOCOUNT, ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING ON; | |
GO | |
EXEC sp_configure 'show advanced options', 1; | |
GO | |
RECONFIGURE | |
GO | |
EXEC sp_configure 'max degree of parallelism', 4; | |
GO | |
EXEC sp_configure 'cost threshold for parallelism', 100 | |
GO | |
EXEC sp_configure 'max server memory (MB)', 7000; | |
GO | |
RECONFIGURE | |
GO | |
/**************************************************** | |
Restore small BabbyNames database | |
****************************************************/ | |
use master; | |
GO | |
IF DB_ID('BabbyNames201711') IS NOT NULL | |
BEGIN | |
IF (SELECT state_desc FROM sys.databases WHERE name='BabbyNames201711') = 'ONLINE' | |
ALTER DATABASE BabbyNames201711 | |
SET SINGLE_USER | |
WITH ROLLBACK IMMEDIATE; | |
END | |
GO | |
RESTORE DATABASE BabbyNames201711 | |
FROM DISK=N'S:\MSSQL\Backup\BabbyNames.bak' | |
WITH | |
MOVE 'BabbyNames' TO 'S:\MSSQL\DATA\BabbyNames201711.mdf', | |
MOVE 'BabbyNames_log' TO 'S:\MSSQL\DATA\BabbyNames201711_log.ldf', | |
REPLACE, | |
RECOVERY; | |
GO | |
ALTER DATABASE BabbyNames201711 SET RECOVERY SIMPLE; | |
GO | |
ALTER DATABASE BabbyNames201711 | |
ADD FILEGROUP fg_FirstNameByBirthDate; | |
GO | |
ALTER DATABASE BabbyNames201711 ADD FILE | |
( NAME = fg_FirstNameByBirthDate_f1, | |
FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f1.ndf', | |
SIZE = 5GB, | |
FILEGROWTH = 512MB | |
) TO FILEGROUP fg_FirstNameByBirthDate; | |
GO | |
ALTER DATABASE BabbyNames201711 ADD FILE | |
( NAME = fg_FirstNameByBirthDate_f2, | |
FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f2.ndf', | |
SIZE = 5GB, | |
FILEGROWTH = 512MB | |
) TO FILEGROUP fg_FirstNameByBirthDate; | |
GO | |
ALTER DATABASE BabbyNames201711 ADD FILE | |
( NAME = fg_FirstNameByBirthDate_f3, | |
FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f3.ndf', | |
SIZE = 5GB, | |
FILEGROWTH = 512MB | |
) TO FILEGROUP fg_FirstNameByBirthDate; | |
GO | |
ALTER DATABASE BabbyNames201711 ADD FILE | |
( NAME = fg_FirstNameByBirthDate_f4, | |
FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f4.ndf', | |
SIZE = 5GB, | |
FILEGROWTH = 512MB | |
) TO FILEGROUP fg_FirstNameByBirthDate; | |
GO | |
/* Make fg_FirstNameByBirthDate the default filegroup */ | |
ALTER DATABASE BabbyNames201711 MODIFY FILEGROUP fg_FirstNameByBirthDate DEFAULT; | |
GO | |
ALTER DATABASE BabbyNames201711 MODIFY FILE (NAME='BabbyNames_log', SIZE=3GB, FILEGROWTH=512MB); | |
GO | |
/* just in case you want to play around with Hekaton... */ | |
ALTER DATABASE BabbyNames201711 ADD FILEGROUP MemoryOptimizedData CONTAINS MEMORY_OPTIMIZED_DATA; | |
GO | |
ALTER DATABASE BabbyNames201711 | |
ADD FILE( NAME = 'MemoryOptimizedData' , FILENAME = 'S:\MSSQL\Data\BabbyNames201711_MemoryOptimizedData') | |
TO FILEGROUP MemoryOptimizedData; | |
GO | |
/**************************************************** | |
Configure database and expand data | |
****************************************************/ | |
SET STATISTICS IO, TIME OFF; | |
GO | |
SET XACT_ABORT, NOCOUNT ON; | |
GO | |
SET ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING, ANSI_WARNINGS, ARITHABORT, CONCAT_NULL_YIELDS_NULL ON; | |
GO | |
SET NUMERIC_ROUNDABORT OFF; | |
GO | |
ALTER DATABASE BabbyNames201711 SET QUERY_STORE = ON | |
GO | |
ALTER DATABASE BabbyNames201711 SET QUERY_STORE | |
(OPERATION_MODE = READ_WRITE, | |
CLEANUP_POLICY = (STALE_QUERY_THRESHOLD_DAYS = 365), MAX_STORAGE_SIZE_MB = 1024 | |
) | |
GO | |
ALTER DATABASE BabbyNames201711 SET COMPATIBILITY_LEVEL=140; | |
GO | |
ALTER DATABASE BabbyNames201711 SET TARGET_RECOVERY_TIME = 60 SECONDS; | |
GO | |
USE BabbyNames201711; | |
GO | |
EXEC evt.logme N'Restored small BabbyNames201711 database'; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF; | |
GO | |
ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF; | |
GO | |
/******************************************************/ | |
/* ref.Numbers */ | |
/******************************************************/ | |
/* Create ref.Numbers. This is a helper "numbers" table just to help us in the next step.*/ | |
IF SCHEMA_ID('ref') IS NULL | |
BEGIN | |
EXEC evt.logme N'Create schema ref.'; | |
EXEC ('CREATE SCHEMA ref AUTHORIZATION dbo'); | |
END | |
GO | |
EXEC evt.logme N'Create ref.Numbers.'; | |
GO | |
IF OBJECT_ID('ref.Numbers','U') IS NOT NULL | |
BEGIN | |
EXEC evt.logme N'Table ref.Numbers already exists, dropping.'; | |
DROP TABLE ref.Numbers; | |
END | |
GO | |
CREATE TABLE ref.Numbers ( | |
Num INT NOT NULL, | |
) on fg_FirstNameByBirthDate; | |
GO | |
EXEC evt.logme N'Load ref.Numbers.'; | |
GO | |
INSERT ref.Numbers | |
(Num) | |
SELECT TOP 10000000 | |
ROW_NUMBER() OVER (ORDER BY fn1.ReportYear) | |
FROM agg.FirstNameByYear AS fn1 | |
CROSS JOIN agg.FirstNameByYear AS fn2; | |
GO | |
EXEC evt.logme N'Index and key ref.Numbers.'; | |
GO | |
CREATE CLUSTERED COLUMNSTORE INDEX ccx_ref_Numbers on ref.Numbers; | |
GO | |
ALTER TABLE ref.Numbers | |
ADD CONSTRAINT pk_refNumbers_Num | |
PRIMARY KEY NONCLUSTERED (Num) | |
ON fg_FirstNameByBirthDate; | |
GO | |
/******************************************************/ | |
/* Helper index */ | |
/******************************************************/ | |
EXEC evt.logme N'Create nccx_halp ON agg.FirstNameByYear.'; | |
GO | |
CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_halp on agg.FirstNameByYear | |
(FirstNameId, ReportYear, NameCount, Gender) | |
ON fg_FirstNameByBirthDate | |
GO | |
/******************************************************/ | |
/* Create and load dbo.FirstNameByBirthDate */ | |
/******************************************************/ | |
EXEC evt.logme N'Load dbo.FirstNameByBirthDateStage'; | |
GO | |
/* This select into gets parallel insert plan, 20170906. | |
We set the default filegroup above so this will go into fg_FirstNameByBirthDate, | |
I am just using the hint added in 2017 for future proofing. */ | |
SELECT | |
ISNULL(CAST(ROW_NUMBER() OVER (ORDER BY (SELECT 'boop')) AS BIGINT),0) AS FirstNameByBirthDateId, | |
DATEADD(mi,n.Num * 5.1,CAST('1/1/' + CAST(ReportYear AS CHAR(4)) AS datetime2(0))) as FakeBirthDateStamp, | |
fn.StateCode, | |
fn.FirstNameId, | |
Gender, | |
CAST(NULL AS TINYINT) as Flag1, | |
CAST(NULL AS CHAR(1)) as Flag2 | |
INTO dbo.FirstNameByBirthDateStage ON fg_FirstNameByBirthDate /* This added in SQL Server 2017*/ | |
FROM agg.FirstNameByYearState AS fn | |
CROSS APPLY (select Num from ref.Numbers where Num <= fn.NameCount) AS n | |
WHERE fn.ReportYear >= 1970 /* Limit size of dataset here */ | |
OPTION (RECOMPILE); | |
GO | |
EXEC evt.logme N'Add BirthYear computed column to dbo.FirstNameByBirthDateStage'; | |
GO | |
ALTER TABLE dbo.FirstNameByBirthDateStage | |
ADD BirthYear as YEAR(FakeBirthDateStamp); | |
GO | |
EXEC evt.logme N'Create dbo.FirstNameByBirthDate, which has an identity property'; | |
GO | |
CREATE TABLE dbo.FirstNameByBirthDate ( | |
FirstNameByBirthDateId BIGINT IDENTITY(1,1), | |
FakeBirthDateStamp DATETIME2(0), | |
StateCode CHAR(2) NOT NULL, | |
FirstNameId INT NOT NULL, | |
Gender CHAR(1) NOT NULL, | |
Flag1 TINYINT NULL, | |
Flag2 CHAR(1) NULL, | |
BirthYear AS YEAR(FakeBirthDateStamp) | |
) ON fg_FirstNameByBirthDate | |
GO | |
EXEC evt.logme N'Switch data from dbo.FirstNameByBirthDateStage to dbo.FirstNameByBirthDate'; | |
GO | |
ALTER TABLE dbo.FirstNameByBirthDateStage SWITCH TO dbo.FirstNameByBirthDate; | |
GO | |
EXEC evt.logme N'DBCC CHECKIDENT RESEED for dbo.FirstNameByBirthDateStage'; | |
GO | |
DBCC CHECKIDENT ('dbo.FirstNameByBirthDate', RESEED); | |
GO | |
EXEC evt.logme N'DROP TABLE FirstNameByBirthDateStage'; | |
GO | |
DROP TABLE FirstNameByBirthDateStage; | |
GO | |
EXEC evt.logme N'Create clustered PK on dbo.FirstNameByBirthDate'; | |
GO | |
ALTER TABLE dbo.FirstNameByBirthDate | |
ADD CONSTRAINT pk_FirstNameByBirthDate_FirstNameByBirthDateId | |
PRIMARY KEY CLUSTERED (FirstNameByBirthDateId) | |
WITH (SORT_IN_TEMPDB = ON, | |
DATA_COMPRESSION = ROW) | |
ON fg_FirstNameByBirthDate; | |
GO | |
EXEC evt.logme N'Clean up nccx_halp ON agg.FirstNameByYear.'; | |
GO | |
DROP INDEX IF EXISTS nccx_halp ON agg.FirstNameByYear; | |
GO | |
/******************************************************/ | |
/* Partitioned table */ | |
/******************************************************/ | |
EXEC evt.logme N'CREATE SCHEMA pt AUTHORIZATION dbo'; | |
GO | |
CREATE SCHEMA pt AUTHORIZATION dbo; | |
GO | |
EXEC evt.logme N'CREATE PARTITION FUNCTION pf_fnbd from 1879 to 2020.'; | |
GO | |
declare @dsql nvarchar(max)=N' | |
CREATE PARTITION FUNCTION pf_fnbd (DATETIME2(0)) | |
AS RANGE RIGHT | |
FOR VALUES ( ' + ( | |
SELECT STUFF( | |
(SELECT ', ''' + CAST(Num as CHAR(4)) + '-01-01''' | |
FROM ref.Numbers | |
WHERE Num between 1959 and 2020 | |
FOR XML PATH('')) | |
,1,1,'') | |
)+N' | |
);' | |
EXEC (@dsql); | |
GO | |
EXEC evt.logme N'CREATE PARTITION SCHEME ps_fnbd.'; | |
GO | |
CREATE PARTITION SCHEME ps_fnbd | |
AS PARTITION pf_fnbd | |
ALL TO (fg_FirstNameByBirthDate); | |
GO | |
EXEC evt.logme N'CREATE table pt.FirstNameByBirthDate'; | |
GO | |
CREATE TABLE pt.FirstNameByBirthDate ( | |
FakeBirthDateStamp DATETIME2(0) NOT NULL, | |
FirstNameByBirthDateId BIGINT IDENTITY NOT NULL, | |
StateCode CHAR(2) NOT NULL, | |
FirstNameId INT NULL, | |
Gender CHAR(1) NULL, | |
Flag1 TINYINT NULL, | |
Flag2 CHAR(1) NULL, | |
BirthYear AS YEAR(FakeBirthDateStamp) | |
) ON ps_fnbd (FakeBirthDateStamp) | |
GO | |
EXEC evt.logme N'Insert data from dbo.FirstNameByBirthDate'; | |
GO | |
SET IDENTITY_INSERT pt.FirstNameByBirthDate ON; | |
GO | |
INSERT pt.FirstNameByBirthDate WITH (TABLOCK) | |
(FirstNameByBirthDateId, FakeBirthDateStamp, StateCode, FirstNameId, Gender, Flag1, Flag2) | |
SELECT FirstNameByBirthDateId, FakeBirthDateStamp, StateCode, FirstNameId, Gender, Flag1, Flag2 | |
FROM dbo.FirstNameByBirthDate | |
LEFT JOIN ref.Numbers as n on 1 = -1 /* pointless join to get batch mode on sort operator */ | |
; | |
GO | |
SET IDENTITY_INSERT pt.FirstNameByBirthDate OFF; | |
GO | |
EXEC evt.logme N'CREATE UNIQUE CLUSTERED INDEX cx_pt_FirstNameByBirthDate'; | |
GO | |
CREATE UNIQUE CLUSTERED INDEX cx_pt_FirstNameByBirthDate ON | |
pt.FirstNameByBirthDate (FirstNameByBirthDateId, FakeBirthDateStamp) | |
WITH (SORT_IN_TEMPDB = ON, | |
DATA_COMPRESSION = ROW) | |
ON ps_fnbd (FakeBirthDateStamp); | |
GO | |
EXEC evt.logme N'DBCC CHECKIDENT RESEED for pt.FirstNameByBirthDateStage'; | |
GO | |
DBCC CHECKIDENT ('pt.FirstNameByBirthDate', RESEED); | |
GO | |
/******************************************************/ | |
/* Foreign key constraints */ | |
/******************************************************/ | |
EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_FirstNameId'; | |
GO | |
ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK | |
ADD CONSTRAINT FK_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId) | |
REFERENCES ref.FirstName (FirstNameId); | |
GO | |
EXEC evt.logme N'Create FK FK_pt_FirstNameByBirthDate_FirstNameId'; | |
GO | |
ALTER TABLE pt.FirstNameByBirthDate WITH CHECK | |
ADD CONSTRAINT FK_pt_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId) | |
REFERENCES ref.FirstName (FirstNameId); | |
GO | |
EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_StateCode'; | |
GO | |
ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK | |
ADD CONSTRAINT FK_FirstNameByBirthDate_StateCode FOREIGN KEY (StateCode) | |
REFERENCES ref.State (StateCode); | |
GO | |
EXEC evt.logme N'Create FK FK_pt_FirstNameByBirthDate_StateCode'; | |
GO | |
ALTER TABLE pt.FirstNameByBirthDate WITH CHECK | |
ADD CONSTRAINT FK_pt_FirstNameByBirthDate_StateCode FOREIGN KEY (StateCode) | |
REFERENCES ref.State (StateCode); | |
GO | |
/******************************************************/ | |
/* Indexes */ | |
/******************************************************/ | |
CREATE NONCLUSTERED INDEX dbo_FirstNameByBirthDate_BirthYear | |
ON dbo.FirstNameByBirthDate ( BirthYear ASC ); | |
GO | |
CREATE NONCLUSTERED INDEX pt_FirstNameByBirthDate_BirthYear | |
ON pt.FirstNameByBirthDate ( BirthYear ASC ); | |
GO | |
CREATE NONCLUSTERED INDEX dbo_FirstNameByBirthDate_FirstNameId | |
ON dbo.FirstNameByBirthDate ( FirstNameId ASC ); | |
GO | |
CREATE NONCLUSTERED INDEX pt_FirstNameByBirthDate_FirstNameId | |
ON pt.FirstNameByBirthDate ( FirstNameId ASC ); | |
GO | |
CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_pt_FirstNameByBirthDate | |
ON pt.FirstNameByBirthDate� | |
( FakeBirthDateStamp, FirstNameByBirthDateId, StateCode, FirstNameId, Gender, Flag1, Flag2); | |
GO | |
/******************************************************/ | |
/* Backup */ | |
/******************************************************/ | |
EXEC evt.logme N'Run a full backup'; | |
GO | |
BACKUP DATABASE BabbyNames201711 | |
TO DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak', | |
DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak', | |
DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak', | |
DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak' | |
WITH INIT, COMPRESSION, STATS=5; | |
GO | |
/* Restore command (for reference) */ | |
--use master; | |
--GO | |
--IF DB_ID('BabbyNames201711') IS NOT NULL | |
--BEGIN | |
-- ALTER DATABASE BabbyNames201711 | |
-- SET SINGLE_USER | |
-- WITH ROLLBACK IMMEDIATE; | |
--END | |
--GO | |
--RESTORE DATABASE BabbyNames201711 | |
-- FROM DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak', | |
-- DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak', | |
-- DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak', | |
-- DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak' | |
-- WITH | |
-- MOVE 'BabbyNames' TO 'S:\MSSQL\Data\BabbyNames201711.mdf', | |
-- MOVE 'BabbyNames_log' TO 'S:\MSSQL\Data\BabbyNames201711_log.ldf', | |
-- REPLACE, | |
-- RECOVERY; | |
--GO | |
/******************************************************/ | |
/* All done */ | |
/******************************************************/ | |
EXEC evt.logme N'BEEP BOOP WE ARE DONE'; | |
GO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/***************************************************************************** | |
Copyright (c) 2017 SQL Workbooks LLC | |
Terms of Use: https://sqlworkbooks.com/terms-of-service/ | |
Contact: help@sqlworkbooks.com | |
The script restores the database created and configured in 00-populate-BabbyNames201711.sql | |
If you've just run 00-populate-BabbyNames201711.sql, you don't have to run this. | |
If you want to reset your environment without rerunning 00-populate-BabbyNames201711.sql, | |
modify the file locations here to restore the database. | |
*****************************************************************************/ | |
SET XACT_ABORT, NOCOUNT, ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING ON; | |
GO | |
EXEC sp_configure 'show advanced options', 1; | |
GO | |
RECONFIGURE | |
GO | |
EXEC sp_configure 'max degree of parallelism', 4; | |
GO | |
EXEC sp_configure 'cost threshold for parallelism', 100 | |
GO | |
EXEC sp_configure 'max server memory (MB)', 7000; | |
GO | |
RECONFIGURE | |
GO | |
/**************************************************** | |
Restore database | |
****************************************************/ | |
use master; | |
GO | |
IF DB_ID('BabbyNames201711') IS NOT NULL | |
BEGIN | |
IF (SELECT state_desc FROM sys.databases WHERE name='BabbyNames201711') = 'ONLINE' | |
ALTER DATABASE BabbyNames201711 | |
SET SINGLE_USER | |
WITH ROLLBACK IMMEDIATE; | |
END | |
GO | |
/* What can I say, I like to carefully lay out my files.*/ | |
RESTORE DATABASE BabbyNames201711 | |
FROM DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak', | |
DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak', | |
DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak', | |
DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak' | |
WITH | |
MOVE 'BabbyNames' TO 'S:\MSSQL\Data\BabbyNames201711.mdf', | |
MOVE 'fg_FirstNameByBirthDate_f1' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f1.ndf', | |
MOVE 'fg_FirstNameByBirthDate_f2' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f2.ndf', | |
MOVE 'fg_FirstNameByBirthDate_f3' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f3.ndf', | |
MOVE 'fg_FirstNameByBirthDate_f4' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f4.ndf', | |
MOVE 'BabbyNames_log' TO 'S:\MSSQL\Data\BabbyNames201711_log.ldf', | |
MOVE 'MemoryOptimizedData' TO 'S:\MSSQL\Data\BabbyNames201711_MemoryOptimizedData', | |
REPLACE, | |
RECOVERY; | |
GO |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/***************************************************************************** | |
Copyright (c) 2017 SQL Workbooks LLC | |
Terms of Use: https://sqlworkbooks.com/terms-of-service/ | |
Contact: help@sqlworkbooks.com | |
*****************************************************************************/ | |
RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG; | |
GO | |
USE BabbyNames201711; | |
GO | |
SET NOCOUNT ON; | |
GO | |
/********************************************************/ | |
/* Problem Query: The aggregator */ | |
/********************************************************/ | |
/* Run these with actual plans enabled. | |
Compare the estimated cost (CTP is set at 100) | |
Compare the query time stats | |
Show why the columnstore index is drunk here. | |
*/ | |
SELECT | |
BirthYear, | |
COUNT(*) as NameCount | |
FROM dbo.FirstNameByBirthDate | |
WHERE BirthYear BETWEEN 2001 and 2015 | |
GROUP BY BirthYear | |
ORDER BY COUNT(*) DESC; | |
GO | |
SELECT | |
BirthYear, | |
COUNT(*) as NameCount | |
FROM pt.FirstNameByBirthDate | |
WHERE BirthYear BETWEEN 2001 and 2015 | |
GROUP BY BirthYear | |
ORDER BY COUNT(*) DESC; | |
GO | |
/* OK, let's just not use the columnstore. | |
Let's use the partitioned rowstore index on BirthYear. | |
In my version/compat level, I don't get batch mode with this hint. | |
Are we getting partition elimination or segment elimination? | |
*/ | |
SET STATISTICS IO, TIME ON; | |
GO | |
SELECT | |
BirthYear, | |
COUNT(*) as NameCount | |
FROM pt.FirstNameByBirthDate | |
WHERE BirthYear BETWEEN 2001 and 2015 | |
GROUP BY BirthYear | |
ORDER BY COUNT(*) DESC | |
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX); | |
GO | |
SET STATISTICS IO, TIME OFF; | |
GO | |
/* It's still slower! | |
Example runtimes... | |
Non-partitioned rowstore index on BirthYear: CPU time = 5,157 ms, elapsed time = 1,662 ms. | |
Partitioned rowstore index on BirthYear: CPU time = 10,547 ms, elapsed time = 2,939 ms. | |
Partitioned columnstore index re-computing BirthYear: CPU time = 17,563 ms, elapsed time = 18,094 ms. | |
*/ | |
/* Head back to the slides to explain why the partitioned index is different */ | |
/************************************************************************* | |
Fixes | |
*************************************************************************/ | |
/*************************** | |
FIX 1 - NONALIGNED INDEX | |
***************************/ | |
/* We can create a "non-aligned", non-partitioned index on our partitioned | |
table. Just specify a filegroup rather than a partition scheme */ | |
/* I'm giving it a short (terrible) name just to make it easy to identify in the execution plan */ | |
/* This takes ~2.5 minutes */ | |
CREATE INDEX nonaligned | |
on pt.FirstNameByBirthDate (BirthYear) | |
WITH (SORT_IN_TEMPDB = ON) | |
ON [fg_FirstNameByBirthDate]; | |
GO | |
/* Does SQL Server want to use 'nonaligned'? */ | |
SELECT | |
BirthYear, | |
COUNT(*) as NameCount | |
FROM pt.FirstNameByBirthDate | |
WHERE BirthYear BETWEEN 2001 and 2015 | |
GROUP BY BirthYear | |
ORDER BY COUNT(*) DESC | |
GO | |
SELECT | |
BirthYear, | |
COUNT(*) as NameCount | |
FROM pt.FirstNameByBirthDate | |
WHERE BirthYear BETWEEN 2001 and 2015 | |
GROUP BY BirthYear | |
ORDER BY COUNT(*) DESC | |
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX); | |
GO | |
/* | |
So in our case we have to add a hint (due to that pesky computed column!) | |
And we've lost the ability to do partition level operations: | |
switch any partition in | |
switch any partition out | |
truncate any partition | |
We have to drop or disable all non-aligned indexes to do ANY partition level operation. | |
*/ | |
TRUNCATE TABLE pt.FirstNameByBirthDate | |
WITH (PARTITIONS (1 TO 4)); | |
GO | |
DROP INDEX IF EXISTS nonaligned | |
ON pt.FirstNameByBirthDate; | |
GO | |
/*************************** | |
FIX 1 - NOT SO GREAT. | |
***************************/ | |
/*************************** | |
FIX 2 - QUERY REWRITE TO GET | |
PARTION ELIMINATION | |
***************************/ | |
/* We can rewrite the query to get partition elimination. | |
We're still going to have to do the hash aggregate, but we'll | |
do it for fewer partitions */ | |
/* Run with actual plans on. | |
Look at how many actual partitions were used in the plan. | |
Confirm the number of rowgroups read in stats io output*/ | |
SET STATISTICS IO, TIME ON; | |
GO | |
SELECT | |
BirthYear, | |
COUNT(*) as NameCount | |
FROM pt.FirstNameByBirthDate | |
WHERE | |
FakeBirthDateStamp >= CAST('2001-01-01' AS DATETIME2(0)) and | |
FakeBirthDateStamp < CAST('2016-01-01' AS DATETIME2(0)) | |
GROUP BY BirthYear | |
ORDER BY COUNT(*) DESC; | |
GO | |
SET STATISTICS IO, TIME OFF; | |
GO | |
/* It's still slower! Look at the plan to see why... | |
Rewritten query / partitioned columnstore index: CPU time = 5,734 ms, elapsed time = 5,894 ms. | |
Non-partitioned rowstore index on BirthYear: CPU time = 5,157 ms, elapsed time = 1,662 ms. | |
Partitioned rowstore index on BirthYear: CPU time = 10,547 ms, elapsed time = 2,939 ms. | |
Partitioned columnstore index re-computing BirthYear: CPU time = 17,563 ms, elapsed time = 18,094 ms. | |
*/ | |
/* We're leveraging the partitioning key now, so we are getting partition elimination. | |
However, this is still getting costed very low, and it's going single threaded. | |
It seems to be under-costing the work that compute scalar's doing to re-calc BirthYear to me. | |
*/ | |
/* We could try to force parallelism using undocumented TF 8649. | |
Or on SQL Server 2016 SP1 CU2+ and SQL Server 2017, we could use undocumented hint ENABLE_PARALLEL_PLAN_PREFERENCE | |
http://www.queryprocessor.com/how-to-get-a-parallel-plan/ (Dmitry Pilugin) | |
*/ | |
/* Notice that it's not in this list... */ | |
SELECT * FROM sys.dm_exec_valid_use_hints; | |
GO | |
SET STATISTICS IO, TIME ON; | |
GO | |
SELECT | |
BirthYear, | |
COUNT(*) as NameCount | |
FROM pt.FirstNameByBirthDate | |
WHERE | |
FakeBirthDateStamp >= CAST('2001-01-01' AS DATETIME2(0)) and | |
FakeBirthDateStamp < CAST('2016-01-01' AS DATETIME2(0)) | |
GROUP BY BirthYear | |
ORDER BY COUNT(*) DESC | |
OPTION (USE HINT ('ENABLE_PARALLEL_PLAN_PREFERENCE')) | |
GO | |
SET STATISTICS IO, TIME OFF; | |
GO | |
/* It's still slower! Look at the plan to see why... | |
Rewritten query / partitioned columnstore index: CPU time = 5,734 ms, elapsed time = 5,894 ms. | |
Rewritten query with hint forcing parallelism: CPU time = 7,188 ms, elapsed time = 1,907 ms. | |
Non-partitioned rowstore index on BirthYear: CPU time = 5,157 ms, elapsed time = 1,662 ms. | |
Partitioned rowstore index on BirthYear: CPU time = 10,547 ms, elapsed time = 2,939 ms. | |
Partitioned columnstore index re-computing BirthYear: CPU time = 17,563 ms, elapsed time = 18,094 ms. | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/***************************************************************************** | |
Copyright (c) 2017 SQL Workbooks LLC | |
Terms of Use: https://sqlworkbooks.com/terms-of-service/ | |
Contact: help@sqlworkbooks.com | |
*****************************************************************************/ | |
RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG; | |
GO | |
USE BabbyNames201711; | |
GO | |
/********************************************************/ | |
/* Problem: MIN / MAX get slow */ | |
/********************************************************/ | |
/* Run these with actual plans enabled. | |
Note the differences between the plans. | |
It knows the query against the partitioned table will be more expensive. | |
Look at elapsed time in each plan. | |
Query 1: look at the seek operator and explain why that was fast. | |
Query 2: How many partitions were actually used? | |
*/ | |
/* Why is the second query slower? */ | |
SET STATISTICS IO, TIME ON; | |
GO | |
SELECT MAX(FirstNameId) AS max_val | |
FROM dbo.FirstNameByBirthDate | |
GO | |
SELECT MAX(FirstNameId) AS max_val | |
FROM pt.FirstNameByBirthDate with (index (pt_FirstNameByBirthDate_FirstNameId)) | |
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX) | |
GO | |
EXEC sp_helpindex 'pt.FirstNameByBirthDate' | |
SET STATISTICS IO, TIME OFF; | |
GO | |
/* | |
Non-partitioned rowstore index on FirstNameId: CPU time = 0 ms, elapsed time = 0 ms. | |
Partitioned rowstore index on FirstNameId: CPU time = 26,530 ms, elapsed time = 8,110 ms. | |
*/ | |
/* MIN has the same issue (although of course a different resulting value) */ | |
SELECT MIN(FirstNameId) AS min_val | |
FROM pt.FirstNameByBirthDate | |
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX) | |
GO | |
/* To the slides, to talk this through! */ | |
/************************************************************************* | |
Fixes | |
*************************************************************************/ | |
/*************************** | |
FIX 1 - Non-Aligned Index | |
***************************/ | |
/* We can create a "non-aligned", non-partitioned index on our partitioned | |
table. Just specify a filegroup rather than a partition scheme */ | |
/* This takes ~2 minutes to create */ | |
CREATE INDEX nonaligned | |
on pt.FirstNameByBirthDate (FirstNameId) | |
WITH (SORT_IN_TEMPDB = ON) | |
ON [fg_FirstNameByBirthDate]; | |
GO | |
/* Now we get the "non-partitioned" plan .... */ | |
SELECT MAX(FirstNameId) AS max_val | |
FROM pt.FirstNameByBirthDate; | |
GO | |
/* But we've lost the ability to do partition level operations: | |
switch any partition in | |
switch any partition out | |
truncate any partition | |
We have to drop or disable all non-aligned indexes to do ANY partition level operation. | |
*/ | |
TRUNCATE TABLE pt.FirstNameByBirthDate | |
WITH (PARTITIONS (1 TO 4)); | |
GO | |
DROP INDEX IF EXISTS nonaligned | |
ON pt.FirstNameByBirthDate; | |
GO | |
/*************************** | |
FIX 2 - Query rewrite | |
***************************/ | |
/* OK, let's practice the recommended workaround from the Connect Item */ | |
/* Our solution relies on the built-in $partition function. | |
This computes which partition data is in. | |
Here's a simple example using it for just one row. */ | |
SELECT $partition.pf_fnbd(FakeBirthDateStamp) as partition_number, | |
FakeBirthDateStamp, | |
FirstNameByBirthDateId, | |
BirthYear, | |
FirstNameId, | |
Gender | |
FROM pt.FirstNameByBirthDate | |
WHERE FirstNameByBirthDateId = 12225838 | |
GO | |
/* Testing out the pattern in | |
https://connect.microsoft.com/SQLServer/feedback/details/240968/partition-table-using-min-max-functions-and-top-n-index-selection-and-performance */ | |
/* We're just looking at partition #s 41, 42, 43 here */ | |
/* We can use the function in interesting ways. | |
Look at the plan for this query... it does a very efficient backward scan in each partition */ | |
SELECT MAX(max_val) | |
FROM | |
( VALUES (41), (42), (43) ) as partitiontable(num) /* 3 row table from table value constructor */ | |
CROSS APPLY | |
(SELECT MAX(FirstNameId) as max_val | |
FROM pt.FirstNameByBirthDate | |
/* CROSS APPLY lets us join to the table value constructor in here */ | |
WHERE $partition.pf_fnbd(FakeBirthDateStamp) = partitiontable.num | |
) AS o; | |
GO | |
/* What if we have a changing number of partitions over time? */ | |
/* We can use the partition function "fanout" and a numbers | |
table to construct a table with one row for each partition number, like this */ | |
SELECT n.Num | |
FROM sys.partition_functions AS pf | |
JOIN ref.Numbers as n on n.Num <= pf.fanout | |
WHERE | |
pf.name='pf_fnbd'; | |
GO | |
/* So we can get an automatic peek into each partition which has rows using this... */ | |
/* Note: joining to a system table prevents parallelism, so I'm doing this as a two-step | |
query and putting the value for @fanout into a variable */ | |
SET STATISTICS IO, TIME ON; | |
GO | |
DECLARE @fanout int | |
SELECT @fanout = fanout | |
FROM sys.partition_functions pf | |
WHERE pf.name='pf_fnbd'; | |
SELECT MAX(max_val)AS max_val | |
FROM ( | |
SELECT Num | |
FROM ref.Numbers | |
WHERE Num <= @fanout | |
) as partitiontable(num) | |
CROSS APPLY | |
(select MAX(FirstNameId) as max_val | |
from pt.FirstNameByBirthDate | |
where $partition.pf_fnbd(FakeBirthDateStamp) = partitiontable.num | |
) as o; | |
GO | |
SET STATISTICS IO, TIME OFF; | |
GO | |
/* | |
Rewritten query for partitioned index on FirstNameId CPU time = 31 ms, elapsed time = 8 ms. | |
Non-partitioned rowstore index on FirstNameId: CPU time = 0 ms, elapsed time = 0 ms. | |
Partitioned rowstore index on FirstNameId: CPU time = 26,530 ms, elapsed time = 8,110 ms. | |
*/ | |
/* The TSQL is not pretty. | |
Don't like it? | |
Vote up the bug! | |
https://connect.microsoft.com/SQLServer/feedback/details/240968/partition-table-using-min-max-functions-and-top-n-index-selection-and-performance | |
*/ | |
/*************************** | |
FIX 3 - Columnstore | |
***************************/ | |
SET STATISTICS IO, TIME ON; | |
GO | |
SELECT MAX(FirstNameId) AS max_val | |
FROM pt.FirstNameByBirthDate; | |
GO | |
SET STATISTICS IO, TIME OFF; | |
GO | |
/* Enable actual plan. | |
Look at "Locally Aggregated Rows" on properties of scan. | |
Bonus: look at the number of rows coming out of the Columnstore Index Scan. | |
Talk about why that makes sense (it does!) | |
*/ | |
SELECT MAX(FirstNameId) AS max_val | |
FROM pt.FirstNameByBirthDate; | |
GO | |
/* | |
Partitioned columnstore CPU time = 406 ms, elapsed time = 407 ms. | |
Rewritten query for partitioned index on FirstNameId CPU time = 31 ms, elapsed time = 8 ms. | |
Non-partitioned rowstore index on FirstNameId: CPU time = 0 ms, elapsed time = 0 ms. | |
Partitioned rowstore index on FirstNameId: CPU time = 26,530 ms, elapsed time = 8,110 ms. | |
*/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/***************************************************************************** | |
Copyright (c) 2017 SQL Workbooks LLC | |
Terms of Use: https://sqlworkbooks.com/terms-of-service/ | |
Contact: help@sqlworkbooks.com | |
*****************************************************************************/ | |
RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG; | |
GO | |
/********************************************************/ | |
/* Problem: Unexpected blocking */ | |
/********************************************************/ | |
/* Start this query in another session. | |
This will take out a lock on the partition holding 2015 data */ | |
USE BabbyNames201711; | |
GO | |
BEGIN TRAN | |
DECLARE @updateval DATETIME2(0)='2010-01-01 03:49:00' | |
UPDATE pt.FirstNameByBirthDate | |
SET Flag1 = 1 | |
WHERE FakeBirthDateStamp=@updateval | |
and FirstNameId = 67092; | |
/* This query should only need to read from one partition. | |
Start it up in this session. */ | |
USE BabbyNames201711; | |
GO | |
SELECT COUNT(*) | |
FROM pt.FirstNameByBirthDate | |
WHERE FakeBirthDateStamp >= '2000-01-01 00:00:00.0' | |
AND FakeBirthDateStamp < '2000-01-02 00:00:00.0'; | |
GO | |
/* Confirm that it is blocked in a third session. | |
sp_WhoIsActive is a free procedure written by Adam Machanic. | |
Get it at WhoIsActive.com | |
Why is it blocked? | |
*/ | |
exec sp_WhoIsActive @get_plans=1; | |
GO | |
/* Look at the predicate on the columnstore operator in the plan | |
There's a convert_implicit | |
And there's @1 and @1 | |
Cancel the blocked query | |
*/ | |
/* This is a simple query, so SQL Server is automatically parameterizing those dates. | |
It sees it needs to make them a DATETIME2, but it defaults to making them DATETIME2(7) | |
The column in the table is a DATETIME2(0). | |
DATETIME2(7) has a higher precedence than DATETIME2(0) | |
SQL Server has to up-sample EVERY SINGLE ROW in the table to DATETIME2(7) to compare values. | |
*/ | |
/* Get an estimated plan for this corrected query. | |
Look at the seek predicate which magically appeared. | |
Then run the query and look at the actual plan / | |
actual partitions accessed. */ | |
SELECT COUNT(*) | |
FROM pt.FirstNameByBirthDate | |
WHERE FakeBirthDateStamp > CAST('2000-01-01 00:00:00.0' AS DATETIME2(0)) | |
AND FakeBirthDateStamp <= CAST('2000-01-02 00:00:00.0' AS DATETIME2(0)) | |
GO | |
/* This also works fine */ | |
DECLARE @FakeBirthDateStampStart DATETIME2(0) = '2000-01-01 00:00:00.0', | |
@FakeBirthDateStampEnd DATETIME2(0) = '2000-01-02 00:00:00.0'; | |
SELECT COUNT(*) | |
FROM pt.FirstNameByBirthDate | |
WHERE FakeBirthDateStamp > @FakeBirthDateStampStart | |
AND FakeBirthDateStamp <= @FakeBirthDateStampEnd | |
GO | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment