LitKnd/00-populate-BabbyNames201711.sql Secret

## 00-populate-BabbyNames201711.sql
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com

Setup:
    Download BabbyNames.bak.zip (41 MB zipped database backup)
    https://github.com/LitKnd/BabbyNames/releases/tag/v1.1

Then review and run the script below on a SQL Server 2017 dedicated test instance
    Developer Edition recommended (Enteprise and Evaluation Editions will work too)

The script
    Restores the database (edit the file locations for your instance)
    Expands and modifies the data
        21GB data files (multiple files in a couple of filegroups)
        3GB log file
    Duration on my test instance (4 vCPUs, SSDs): ~28 minutes

This requires some tempdb space to do sorts, etc. I ran with 4x 1GB tempdb files.

*****************************************************************************/

SET XACT_ABORT, NOCOUNT, ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING ON;
GO

EXEC sp_configure 'show advanced options', 1;
GO
RECONFIGURE
GO

EXEC sp_configure 'max degree of parallelism', 4;
GO

EXEC sp_configure 'cost threshold for parallelism', 100
GO

EXEC sp_configure 'max server memory (MB)', 7000;
GO

RECONFIGURE
GO


/****************************************************
Restore small BabbyNames database
****************************************************/
use master;
GO

IF DB_ID('BabbyNames201711') IS NOT NULL
BEGIN
    IF (SELECT state_desc FROM sys.databases WHERE name='BabbyNames201711') = 'ONLINE'
    ALTER DATABASE BabbyNames201711
        SET SINGLE_USER
        WITH ROLLBACK IMMEDIATE;
END
GO

RESTORE DATABASE BabbyNames201711
    FROM DISK=N'S:\MSSQL\Backup\BabbyNames.bak'
    WITH
        MOVE 'BabbyNames' TO 'S:\MSSQL\DATA\BabbyNames201711.mdf',
        MOVE 'BabbyNames_log' TO 'S:\MSSQL\DATA\BabbyNames201711_log.ldf',
        REPLACE,
        RECOVERY;
GO

ALTER DATABASE BabbyNames201711 SET RECOVERY SIMPLE;
GO


ALTER DATABASE BabbyNames201711
    ADD FILEGROUP fg_FirstNameByBirthDate;
GO


ALTER DATABASE BabbyNames201711 ADD FILE
(   NAME = fg_FirstNameByBirthDate_f1,
    FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f1.ndf',
    SIZE = 5GB,
    FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO

ALTER DATABASE BabbyNames201711 ADD FILE
(   NAME = fg_FirstNameByBirthDate_f2,
    FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f2.ndf',
    SIZE = 5GB,
    FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO

ALTER DATABASE BabbyNames201711 ADD FILE
(   NAME = fg_FirstNameByBirthDate_f3,
    FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f3.ndf',
    SIZE = 5GB,
    FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO

ALTER DATABASE BabbyNames201711 ADD FILE
(   NAME = fg_FirstNameByBirthDate_f4,
    FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f4.ndf',
    SIZE = 5GB,
    FILEGROWTH = 512MB
) TO FILEGROUP fg_FirstNameByBirthDate;
GO

/* Make fg_FirstNameByBirthDate the default filegroup */
ALTER DATABASE BabbyNames201711 MODIFY FILEGROUP fg_FirstNameByBirthDate DEFAULT;
GO

ALTER DATABASE BabbyNames201711 MODIFY FILE (NAME='BabbyNames_log', SIZE=3GB, FILEGROWTH=512MB);
GO

/* just in case you want to play around with Hekaton... */
ALTER DATABASE BabbyNames201711 ADD FILEGROUP MemoryOptimizedData CONTAINS MEMORY_OPTIMIZED_DATA;
GO

ALTER DATABASE BabbyNames201711
    ADD FILE( NAME = 'MemoryOptimizedData' , FILENAME = 'S:\MSSQL\Data\BabbyNames201711_MemoryOptimizedData')
    TO FILEGROUP MemoryOptimizedData;
GO


/****************************************************
Configure database and expand data
****************************************************/

SET STATISTICS IO, TIME OFF;
GO
SET XACT_ABORT, NOCOUNT ON;
GO
SET ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING, ANSI_WARNINGS, ARITHABORT, CONCAT_NULL_YIELDS_NULL ON;
GO
SET NUMERIC_ROUNDABORT OFF;
GO


ALTER DATABASE BabbyNames201711 SET QUERY_STORE = ON
GO
ALTER DATABASE BabbyNames201711 SET QUERY_STORE
    (OPERATION_MODE = READ_WRITE,
    CLEANUP_POLICY = (STALE_QUERY_THRESHOLD_DAYS = 365), MAX_STORAGE_SIZE_MB = 1024
    )
GO


ALTER DATABASE BabbyNames201711 SET COMPATIBILITY_LEVEL=140;
GO

ALTER DATABASE BabbyNames201711 SET TARGET_RECOVERY_TIME = 60 SECONDS;
GO

USE BabbyNames201711;
GO

EXEC evt.logme N'Restored small BabbyNames201711 database';
GO

ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF;
GO

ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF;
GO


/******************************************************/
/* ref.Numbers                                        */
/******************************************************/

/* Create ref.Numbers. This is a helper "numbers" table just to help us in the next step.*/
IF SCHEMA_ID('ref') IS NULL
BEGIN
    EXEC evt.logme N'Create schema ref.';

    EXEC ('CREATE SCHEMA ref AUTHORIZATION dbo');
END
GO

EXEC evt.logme N'Create ref.Numbers.';
GO
IF OBJECT_ID('ref.Numbers','U') IS NOT NULL
BEGIN
    EXEC evt.logme N'Table ref.Numbers already exists, dropping.';

    DROP TABLE ref.Numbers;
END
GO

CREATE TABLE ref.Numbers (
    Num INT NOT NULL,
) on fg_FirstNameByBirthDate;
GO

EXEC evt.logme N'Load ref.Numbers.';
GO
INSERT ref.Numbers
    (Num)
SELECT TOP 10000000
    ROW_NUMBER() OVER (ORDER BY fn1.ReportYear)
FROM agg.FirstNameByYear AS fn1
CROSS JOIN agg.FirstNameByYear AS fn2;
GO

EXEC evt.logme N'Index and key ref.Numbers.';
GO

CREATE CLUSTERED COLUMNSTORE INDEX ccx_ref_Numbers on ref.Numbers;
GO

ALTER TABLE ref.Numbers
    ADD CONSTRAINT pk_refNumbers_Num
        PRIMARY KEY NONCLUSTERED (Num)
        ON fg_FirstNameByBirthDate;
GO


/******************************************************/
/* Helper index                                       */
/******************************************************/

EXEC evt.logme N'Create nccx_halp ON agg.FirstNameByYear.';
GO

CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_halp on agg.FirstNameByYear
    (FirstNameId, ReportYear, NameCount, Gender)
    ON fg_FirstNameByBirthDate
GO


/******************************************************/
/* Create and load dbo.FirstNameByBirthDate           */
/******************************************************/

EXEC evt.logme N'Load dbo.FirstNameByBirthDateStage';
GO

/* This select into gets parallel insert plan, 20170906.
We set the default filegroup above so this will go into fg_FirstNameByBirthDate,
I am just using the hint added in 2017 for future proofing. */
SELECT
    ISNULL(CAST(ROW_NUMBER() OVER (ORDER BY (SELECT 'boop')) AS BIGINT),0) AS FirstNameByBirthDateId,
    DATEADD(mi,n.Num * 5.1,CAST('1/1/' + CAST(ReportYear AS CHAR(4)) AS datetime2(0))) as FakeBirthDateStamp,
    fn.StateCode,
    fn.FirstNameId,
    Gender,
    CAST(NULL AS TINYINT) as Flag1,
    CAST(NULL AS CHAR(1)) as Flag2
INTO dbo.FirstNameByBirthDateStage  ON fg_FirstNameByBirthDate /* This added in SQL Server 2017*/
FROM agg.FirstNameByYearState AS fn
CROSS APPLY (select Num from ref.Numbers where Num <= fn.NameCount) AS n
WHERE fn.ReportYear >= 1970 /* Limit size of dataset here */
    OPTION (RECOMPILE);
GO

EXEC evt.logme N'Add BirthYear computed column to  dbo.FirstNameByBirthDateStage';
GO

ALTER TABLE dbo.FirstNameByBirthDateStage
    ADD BirthYear as YEAR(FakeBirthDateStamp);
GO

EXEC evt.logme N'Create dbo.FirstNameByBirthDate, which has an identity property';
GO


CREATE TABLE dbo.FirstNameByBirthDate (
    FirstNameByBirthDateId BIGINT IDENTITY(1,1),
    FakeBirthDateStamp DATETIME2(0),
    StateCode CHAR(2) NOT NULL,
    FirstNameId INT NOT NULL,
    Gender CHAR(1) NOT NULL,
    Flag1 TINYINT NULL,
    Flag2 CHAR(1) NULL,
    BirthYear AS YEAR(FakeBirthDateStamp)
) ON fg_FirstNameByBirthDate
GO


EXEC evt.logme N'Switch data from dbo.FirstNameByBirthDateStage to dbo.FirstNameByBirthDate';
GO

ALTER TABLE dbo.FirstNameByBirthDateStage SWITCH TO dbo.FirstNameByBirthDate;
GO


EXEC evt.logme N'DBCC CHECKIDENT RESEED for dbo.FirstNameByBirthDateStage';
GO

DBCC CHECKIDENT ('dbo.FirstNameByBirthDate', RESEED);
GO


EXEC evt.logme N'DROP TABLE FirstNameByBirthDateStage';
GO
DROP TABLE FirstNameByBirthDateStage;
GO

EXEC evt.logme N'Create clustered PK on dbo.FirstNameByBirthDate';
GO

ALTER TABLE dbo.FirstNameByBirthDate
    ADD CONSTRAINT pk_FirstNameByBirthDate_FirstNameByBirthDateId
        PRIMARY KEY CLUSTERED (FirstNameByBirthDateId)
	WITH (SORT_IN_TEMPDB = ON,
        DATA_COMPRESSION = ROW)
    ON fg_FirstNameByBirthDate;
GO

EXEC evt.logme N'Clean up nccx_halp ON agg.FirstNameByYear.';
GO
DROP INDEX IF EXISTS nccx_halp ON agg.FirstNameByYear;
GO


/******************************************************/
/* Partitioned table                                  */
/******************************************************/
EXEC evt.logme N'CREATE SCHEMA pt AUTHORIZATION dbo';
GO
CREATE SCHEMA pt AUTHORIZATION dbo;
GO

EXEC evt.logme N'CREATE PARTITION FUNCTION pf_fnbd from 1879 to 2020.';
GO
declare @dsql nvarchar(max)=N'
CREATE PARTITION FUNCTION pf_fnbd (DATETIME2(0))
    AS RANGE RIGHT
    FOR VALUES ( ' + (
        SELECT STUFF(
            (SELECT ', ''' + CAST(Num as CHAR(4)) + '-01-01'''
            FROM ref.Numbers
            WHERE Num between 1959 and 2020
            FOR XML PATH(''))
        ,1,1,'')
)+N'
        );'

EXEC (@dsql);
GO

EXEC evt.logme N'CREATE PARTITION SCHEME ps_fnbd.';
GO
CREATE PARTITION SCHEME ps_fnbd
    AS PARTITION pf_fnbd
    ALL TO (fg_FirstNameByBirthDate);
GO

EXEC evt.logme N'CREATE table pt.FirstNameByBirthDate';
GO
CREATE TABLE pt.FirstNameByBirthDate (
	FakeBirthDateStamp DATETIME2(0) NOT NULL,
	FirstNameByBirthDateId BIGINT IDENTITY NOT NULL,
    StateCode CHAR(2) NOT NULL,
	FirstNameId INT NULL,
	Gender CHAR(1) NULL,
    Flag1 TINYINT NULL,
    Flag2 CHAR(1) NULL,
    BirthYear AS YEAR(FakeBirthDateStamp)
) ON ps_fnbd (FakeBirthDateStamp)
GO


EXEC evt.logme N'Insert data from dbo.FirstNameByBirthDate';
GO
SET IDENTITY_INSERT pt.FirstNameByBirthDate ON;
GO


INSERT pt.FirstNameByBirthDate WITH (TABLOCK)
    (FirstNameByBirthDateId, FakeBirthDateStamp, StateCode, FirstNameId, Gender, Flag1, Flag2)
SELECT FirstNameByBirthDateId, FakeBirthDateStamp, StateCode, FirstNameId, Gender, Flag1, Flag2
FROM dbo.FirstNameByBirthDate
LEFT JOIN ref.Numbers as n on 1 = -1 /* pointless join to get batch mode on sort operator */
;
GO

SET IDENTITY_INSERT pt.FirstNameByBirthDate OFF;
GO


EXEC evt.logme N'CREATE UNIQUE CLUSTERED INDEX cx_pt_FirstNameByBirthDate';
GO
CREATE UNIQUE CLUSTERED INDEX cx_pt_FirstNameByBirthDate ON
    pt.FirstNameByBirthDate (FirstNameByBirthDateId, FakeBirthDateStamp)
	WITH (SORT_IN_TEMPDB = ON,
        DATA_COMPRESSION = ROW)
    ON ps_fnbd (FakeBirthDateStamp);
GO


EXEC evt.logme N'DBCC CHECKIDENT RESEED for pt.FirstNameByBirthDateStage';
GO

DBCC CHECKIDENT ('pt.FirstNameByBirthDate', RESEED);
GO


/******************************************************/
/* Foreign key constraints                            */
/******************************************************/

EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_FirstNameId';
GO

ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK
ADD CONSTRAINT FK_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId)
    REFERENCES ref.FirstName (FirstNameId);
GO


EXEC evt.logme N'Create FK FK_pt_FirstNameByBirthDate_FirstNameId';
GO

ALTER TABLE pt.FirstNameByBirthDate WITH CHECK
ADD CONSTRAINT FK_pt_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId)
    REFERENCES ref.FirstName (FirstNameId);
GO

EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_StateCode';
GO

ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK
ADD CONSTRAINT FK_FirstNameByBirthDate_StateCode FOREIGN KEY (StateCode)
    REFERENCES ref.State (StateCode);
GO

EXEC evt.logme N'Create FK FK_pt_FirstNameByBirthDate_StateCode';
GO

ALTER TABLE pt.FirstNameByBirthDate WITH CHECK
ADD CONSTRAINT FK_pt_FirstNameByBirthDate_StateCode FOREIGN KEY (StateCode)
    REFERENCES ref.State (StateCode);
GO


/******************************************************/
/* Indexes                                            */
/******************************************************/

CREATE NONCLUSTERED INDEX dbo_FirstNameByBirthDate_BirthYear
	ON dbo.FirstNameByBirthDate ( BirthYear ASC );
GO

CREATE NONCLUSTERED INDEX pt_FirstNameByBirthDate_BirthYear
	ON pt.FirstNameByBirthDate ( BirthYear ASC );
GO

CREATE NONCLUSTERED INDEX dbo_FirstNameByBirthDate_FirstNameId
	ON dbo.FirstNameByBirthDate ( FirstNameId ASC );
GO

CREATE NONCLUSTERED INDEX pt_FirstNameByBirthDate_FirstNameId
	ON pt.FirstNameByBirthDate ( FirstNameId ASC );
GO

CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_pt_FirstNameByBirthDate
	ON pt.FirstNameByBirthDate�
( FakeBirthDateStamp, FirstNameByBirthDateId, StateCode, FirstNameId, Gender, Flag1, Flag2);
GO


/******************************************************/
/*  Backup                                            */
/******************************************************/

EXEC evt.logme N'Run a full backup';
GO


BACKUP DATABASE BabbyNames201711
    TO DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak'
    WITH INIT, COMPRESSION, STATS=5;
GO


/* Restore command (for reference) */
--use master;
--GO

--IF DB_ID('BabbyNames201711') IS NOT NULL
--BEGIN
--    ALTER DATABASE BabbyNames201711
--        SET SINGLE_USER
--        WITH ROLLBACK IMMEDIATE;
--END
--GO

--RESTORE DATABASE BabbyNames201711
--    FROM DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak',
--    DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak',
--    DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak',
--    DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak'
--    WITH
--        MOVE 'BabbyNames' TO 'S:\MSSQL\Data\BabbyNames201711.mdf',
--        MOVE 'BabbyNames_log' TO 'S:\MSSQL\Data\BabbyNames201711_log.ldf',
--        REPLACE,
--        RECOVERY;
--GO


/******************************************************/
/* All done                                           */
/******************************************************/

EXEC evt.logme N'BEEP BOOP WE ARE DONE';
GO

## 01-restore.sql
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com

The script restores the database created and configured in 00-populate-BabbyNames201711.sql

If you've just run 00-populate-BabbyNames201711.sql, you don't have to run this.

If you want to reset your environment without rerunning 00-populate-BabbyNames201711.sql,
    modify the file locations here to restore the database.
*****************************************************************************/


SET XACT_ABORT, NOCOUNT, ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING ON;
GO

EXEC sp_configure 'show advanced options', 1;
GO
RECONFIGURE
GO

EXEC sp_configure 'max degree of parallelism', 4;
GO

EXEC sp_configure 'cost threshold for parallelism', 100
GO

EXEC sp_configure 'max server memory (MB)', 7000;
GO

RECONFIGURE
GO

/****************************************************
Restore database
****************************************************/
use master;
GO

IF DB_ID('BabbyNames201711') IS NOT NULL
BEGIN
    IF (SELECT state_desc FROM sys.databases WHERE name='BabbyNames201711') = 'ONLINE'
    ALTER DATABASE BabbyNames201711
        SET SINGLE_USER
        WITH ROLLBACK IMMEDIATE;
END
GO

/* What can I say, I like to carefully lay out my files.*/
RESTORE DATABASE BabbyNames201711
    FROM DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak',
    DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak'
    WITH
        MOVE 'BabbyNames' TO 'S:\MSSQL\Data\BabbyNames201711.mdf',
		MOVE 'fg_FirstNameByBirthDate_f1' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f1.ndf',
		MOVE 'fg_FirstNameByBirthDate_f2' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f2.ndf',
		MOVE 'fg_FirstNameByBirthDate_f3' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f3.ndf',
		MOVE 'fg_FirstNameByBirthDate_f4' TO 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f4.ndf',
        MOVE 'BabbyNames_log' TO 'S:\MSSQL\Data\BabbyNames201711_log.ldf',
		MOVE 'MemoryOptimizedData' TO 'S:\MSSQL\Data\BabbyNames201711_MemoryOptimizedData',
        REPLACE,
        RECOVERY;
GO

## 02-aggregator.sql
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
*****************************************************************************/

RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG;
GO


USE BabbyNames201711;
GO
SET NOCOUNT ON;
GO

/********************************************************/
/* Problem Query: The aggregator                        */
/********************************************************/


/* Run these with actual plans enabled.
Compare the estimated cost (CTP is set at 100)
Compare the query time stats
Show why the columnstore index is drunk here.
*/
SELECT
	BirthYear,
	COUNT(*) as NameCount
FROM dbo.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC;
GO

SELECT
	BirthYear,
	COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC;
GO


/* OK, let's just not use the columnstore.
Let's use the partitioned rowstore index on BirthYear.
In my version/compat level, I don't get batch mode with this hint.

Are we getting partition elimination or segment elimination?
*/
SET STATISTICS IO, TIME ON;
GO
SELECT
	BirthYear,
	COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX);
GO
SET STATISTICS IO, TIME OFF;
GO


/* It's still slower!

Example runtimes...

Non-partitioned rowstore index on BirthYear:			CPU time =  5,157 ms, elapsed time =  1,662 ms.
Partitioned rowstore index on BirthYear:				CPU time = 10,547 ms, elapsed time =  2,939 ms.
Partitioned columnstore index re-computing BirthYear:   CPU time = 17,563 ms, elapsed time = 18,094 ms.
*/

/* Head back to the slides to explain why the partitioned index is different */


/*************************************************************************
Fixes
*************************************************************************/

/***************************
FIX 1 - NONALIGNED INDEX
***************************/

/* We can create a "non-aligned", non-partitioned index on our partitioned
table. Just specify a filegroup rather than a partition scheme */
/* I'm giving it a short (terrible) name just to make it easy to identify in the execution plan */
/* This takes ~2.5 minutes */
CREATE INDEX nonaligned
	on pt.FirstNameByBirthDate (BirthYear)
	WITH (SORT_IN_TEMPDB = ON)
	ON [fg_FirstNameByBirthDate];
GO


/* Does SQL Server want to use 'nonaligned'? */
SELECT
	BirthYear,
	COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC
GO

SELECT
	BirthYear,
	COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE BirthYear BETWEEN 2001 and 2015
GROUP BY BirthYear
ORDER BY COUNT(*) DESC
    OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX);
GO

/*
So in our case we have to add a hint (due to that pesky computed column!)
And we've lost the ability to do partition level operations:
	switch any partition in
	switch any partition out
	truncate any partition

We have to drop or disable all non-aligned indexes to do ANY partition level operation.
*/

TRUNCATE TABLE pt.FirstNameByBirthDate
    WITH (PARTITIONS (1 TO 4));
GO

DROP INDEX IF EXISTS nonaligned
    ON pt.FirstNameByBirthDate;
GO

/***************************
FIX 1 - NOT SO GREAT.
***************************/


/***************************
FIX 2 - QUERY REWRITE TO GET
PARTION ELIMINATION
***************************/


/* We can rewrite the query to get partition elimination.
We're still going to have to do the hash aggregate, but we'll
do it for fewer partitions */

/* Run with actual plans on.
Look at how many actual partitions were used in the plan.
Confirm the number of rowgroups read in stats io output*/
SET STATISTICS IO, TIME ON;
GO
SELECT
	BirthYear,
	COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE
    FakeBirthDateStamp >= CAST('2001-01-01' AS DATETIME2(0)) and
    FakeBirthDateStamp < CAST('2016-01-01' AS DATETIME2(0))
GROUP BY BirthYear
ORDER BY COUNT(*) DESC;
GO
SET STATISTICS IO, TIME OFF;
GO


/* It's still slower! Look at the plan to see why...

Rewritten query / partitioned columnstore index:		CPU time = 5,734 ms,  elapsed time =  5,894 ms.

Non-partitioned rowstore index on BirthYear:			CPU time =  5,157 ms, elapsed time =  1,662 ms.
Partitioned rowstore index on BirthYear:				CPU time = 10,547 ms, elapsed time =  2,939 ms.
Partitioned columnstore index re-computing BirthYear:   CPU time = 17,563 ms, elapsed time = 18,094 ms.
*/


/* We're leveraging the partitioning key now, so we are getting partition elimination.
However, this is still getting costed very low, and it's going single threaded.
It seems to be under-costing the work that compute scalar's doing to re-calc BirthYear to me.
*/


/* We could try to force parallelism using undocumented TF 8649.

Or on SQL Server 2016 SP1 CU2+ and SQL Server 2017, we could use undocumented hint ENABLE_PARALLEL_PLAN_PREFERENCE
http://www.queryprocessor.com/how-to-get-a-parallel-plan/  (Dmitry Pilugin)
*/

/* Notice that it's not in this list... */
SELECT * FROM sys.dm_exec_valid_use_hints;
GO


SET STATISTICS IO, TIME ON;
GO
SELECT
	BirthYear,
	COUNT(*) as NameCount
FROM pt.FirstNameByBirthDate
WHERE
    FakeBirthDateStamp >= CAST('2001-01-01' AS DATETIME2(0)) and
    FakeBirthDateStamp < CAST('2016-01-01' AS DATETIME2(0))
GROUP BY BirthYear
ORDER BY COUNT(*) DESC
	OPTION (USE HINT ('ENABLE_PARALLEL_PLAN_PREFERENCE'))
GO
SET STATISTICS IO, TIME OFF;
GO


/* It's still slower! Look at the plan to see why...

Rewritten query / partitioned columnstore index:		CPU time = 5,734 ms,  elapsed time =  5,894 ms.
Rewritten query with hint forcing parallelism:			CPU time = 7,188 ms,  elapsed time =  1,907 ms.

Non-partitioned rowstore index on BirthYear:			CPU time =  5,157 ms, elapsed time =  1,662 ms.
Partitioned rowstore index on BirthYear:				CPU time = 10,547 ms, elapsed time =  2,939 ms.
Partitioned columnstore index re-computing BirthYear:   CPU time = 17,563 ms, elapsed time = 18,094 ms.
*/

## 03-Min_Max.sql
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
*****************************************************************************/

RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG;
GO


USE BabbyNames201711;
GO

/********************************************************/
/* Problem: MIN / MAX get slow                          */
/********************************************************/


/* Run these with actual plans enabled.
Note the differences between the plans.
It knows the query against the partitioned table will be more expensive.
    Look at elapsed time in each plan.
    Query 1: look at the seek operator and explain why that was fast.
    Query 2: How many partitions were actually used?
*/
/* Why is the second query slower? */

SET STATISTICS IO, TIME ON;
GO
SELECT MAX(FirstNameId) AS max_val
FROM dbo.FirstNameByBirthDate
GO

SELECT MAX(FirstNameId) AS max_val
FROM pt.FirstNameByBirthDate with (index (pt_FirstNameByBirthDate_FirstNameId))
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX)
GO

EXEC sp_helpindex 'pt.FirstNameByBirthDate'

SET STATISTICS IO, TIME OFF;
GO


/*
Non-partitioned rowstore index on FirstNameId:			CPU time =      0 ms,  elapsed time =     0 ms.
Partitioned rowstore index on FirstNameId:				CPU time = 26,530 ms,  elapsed time = 8,110 ms.
*/


/* MIN has the same issue (although of course a different resulting value) */
SELECT MIN(FirstNameId) AS min_val
FROM pt.FirstNameByBirthDate
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX)
GO


/* To the slides, to talk this through! */


/*************************************************************************
Fixes
*************************************************************************/

/***************************
FIX 1 - Non-Aligned Index
***************************/

/* We can create a "non-aligned", non-partitioned index on our partitioned
table. Just specify a filegroup rather than a partition scheme */
/* This takes ~2 minutes to create */
CREATE INDEX nonaligned
	on pt.FirstNameByBirthDate (FirstNameId)
	WITH (SORT_IN_TEMPDB = ON)
	ON [fg_FirstNameByBirthDate];
GO


/* Now we get the "non-partitioned" plan .... */
SELECT MAX(FirstNameId) AS max_val
FROM pt.FirstNameByBirthDate;
GO

/* But we've lost the ability to do partition level operations:
	switch any partition in
	switch any partition out
	truncate any partition

We have to drop or disable all non-aligned indexes to do ANY partition level operation.
*/

TRUNCATE TABLE pt.FirstNameByBirthDate
    WITH (PARTITIONS (1 TO 4));
GO

DROP INDEX IF EXISTS nonaligned
    ON pt.FirstNameByBirthDate;
GO


/***************************
FIX 2 - Query rewrite
***************************/
/* OK, let's practice the recommended workaround from the Connect Item */

/* Our solution relies on the built-in $partition function.
This computes which partition data is in.
Here's a simple example using it for just one row. */
SELECT $partition.pf_fnbd(FakeBirthDateStamp) as partition_number,
	FakeBirthDateStamp,
    FirstNameByBirthDateId,
    BirthYear,
    FirstNameId,
    Gender
FROM pt.FirstNameByBirthDate
WHERE FirstNameByBirthDateId = 12225838
GO


/* Testing out the pattern in
https://connect.microsoft.com/SQLServer/feedback/details/240968/partition-table-using-min-max-functions-and-top-n-index-selection-and-performance */
/* We're just looking at partition #s 41, 42, 43 here */
/* We can use the function in interesting ways.
Look at the plan for this query... it does a very efficient backward scan in each partition */
SELECT MAX(max_val)
FROM
( VALUES (41), (42), (43) ) as partitiontable(num) /* 3 row table from table value constructor */
CROSS APPLY
     (SELECT MAX(FirstNameId) as max_val
        FROM pt.FirstNameByBirthDate
        /* CROSS APPLY lets us join to the table value constructor in here */
        WHERE $partition.pf_fnbd(FakeBirthDateStamp) = partitiontable.num
	) AS o;
GO


/* What if we have a changing number of partitions over time? */

/* We can use the partition function "fanout" and a numbers
table to construct a table with one row for each partition number, like this */
SELECT n.Num
FROM sys.partition_functions AS pf
JOIN ref.Numbers as n on n.Num <= pf.fanout
WHERE
	pf.name='pf_fnbd';
GO


/* So we can get an automatic peek into each partition which has rows using this... */
/* Note: joining to a system table prevents parallelism, so I'm doing this as a two-step
	query and putting the value for @fanout into a variable */
SET STATISTICS IO, TIME ON;
GO

DECLARE @fanout int
SELECT @fanout = fanout
FROM sys.partition_functions pf
WHERE pf.name='pf_fnbd';

SELECT MAX(max_val)AS max_val
FROM (
	SELECT Num
	FROM ref.Numbers
	WHERE Num <= @fanout
	) as partitiontable(num)
CROSS APPLY
     (select MAX(FirstNameId) as max_val
        from pt.FirstNameByBirthDate
        where $partition.pf_fnbd(FakeBirthDateStamp) = partitiontable.num
	) as o;
GO

SET STATISTICS IO, TIME OFF;
GO


/*
Rewritten query for partitioned index on FirstNameId	CPU time =     31 ms,  elapsed time =     8 ms.

Non-partitioned rowstore index on FirstNameId:			CPU time =      0 ms,  elapsed time =     0 ms.
Partitioned rowstore index on FirstNameId:				CPU time = 26,530 ms,  elapsed time = 8,110 ms.
*/


/* The TSQL is not pretty.
Don't like it?
Vote up the bug!
https://connect.microsoft.com/SQLServer/feedback/details/240968/partition-table-using-min-max-functions-and-top-n-index-selection-and-performance
*/


/***************************
FIX 3 - Columnstore
***************************/

SET STATISTICS IO, TIME ON;
GO
SELECT MAX(FirstNameId) AS max_val
FROM pt.FirstNameByBirthDate;
GO
SET STATISTICS IO, TIME OFF;
GO


/* Enable actual plan.
Look at "Locally Aggregated Rows" on properties of scan.
Bonus: look at the number of rows coming out of the Columnstore Index Scan.
		Talk about why that makes sense (it does!)
*/
SELECT MAX(FirstNameId) AS max_val
FROM pt.FirstNameByBirthDate;
GO


/*
Partitioned columnstore									CPU time =    406 ms,  elapsed time =   407 ms.
Rewritten query for partitioned index on FirstNameId	CPU time =     31 ms,  elapsed time =     8 ms.

Non-partitioned rowstore index on FirstNameId:			CPU time =      0 ms,  elapsed time =     0 ms.
Partitioned rowstore index on FirstNameId:				CPU time = 26,530 ms,  elapsed time = 8,110 ms.
*/

## 04-Blocked.sql
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
*****************************************************************************/

RAISERROR ('Did you mean to run the whole thing?', 20, 1) WITH LOG;
GO

/********************************************************/
/* Problem: Unexpected blocking                         */
/********************************************************/


/* Start this query in another session.
This will take out a lock on the partition holding 2015 data */
USE BabbyNames201711;
GO
BEGIN TRAN
DECLARE @updateval DATETIME2(0)='2010-01-01 03:49:00'

	UPDATE pt.FirstNameByBirthDate
	SET Flag1 = 1
	WHERE FakeBirthDateStamp=@updateval
	and FirstNameId = 67092;


/* This query should only need to read from one partition.
Start it up in this session. */
USE BabbyNames201711;
GO
SELECT COUNT(*)
FROM pt.FirstNameByBirthDate
WHERE FakeBirthDateStamp >= '2000-01-01 00:00:00.0'
	AND FakeBirthDateStamp < '2000-01-02 00:00:00.0';
GO


/* Confirm that it is blocked in a third session.
sp_WhoIsActive is a free procedure written by Adam Machanic.
Get it at WhoIsActive.com
Why is it blocked?
*/
exec sp_WhoIsActive @get_plans=1;
GO

/* Look at the predicate on the columnstore operator in the plan
    There's a convert_implicit
    And there's @1 and @1

Cancel the blocked query
*/


/* This is a simple query, so SQL Server is automatically parameterizing those dates.
It sees it needs to make them a DATETIME2, but it defaults to making them DATETIME2(7)
The column in the table is a DATETIME2(0).

DATETIME2(7) has a higher precedence than DATETIME2(0)
SQL Server has to up-sample EVERY SINGLE ROW in the table to DATETIME2(7) to compare values.
*/


/* Get an estimated plan for this corrected query.
	Look at the seek predicate which magically appeared.
Then run the query and look at the actual plan /
    actual partitions accessed.                         */
SELECT COUNT(*)
FROM pt.FirstNameByBirthDate
WHERE FakeBirthDateStamp > CAST('2000-01-01 00:00:00.0' AS DATETIME2(0))
	AND FakeBirthDateStamp <= CAST('2000-01-02 00:00:00.0' AS DATETIME2(0))
GO


/* This also works fine */
DECLARE @FakeBirthDateStampStart DATETIME2(0) = '2000-01-01 00:00:00.0',
	@FakeBirthDateStampEnd DATETIME2(0) = '2000-01-02 00:00:00.0';

SELECT COUNT(*)
FROM pt.FirstNameByBirthDate
WHERE FakeBirthDateStamp > @FakeBirthDateStampStart
	AND FakeBirthDateStamp <= @FakeBirthDateStampEnd
GO
	/*****************************************************************************
	Copyright (c) 2017 SQL Workbooks LLC
	Terms of Use: https://sqlworkbooks.com/terms-of-service/
	Contact: help@sqlworkbooks.com

	Setup:
	Download BabbyNames.bak.zip (41 MB zipped database backup)
	https://github.com/LitKnd/BabbyNames/releases/tag/v1.1

	Then review and run the script below on a SQL Server 2017 dedicated test instance
	Developer Edition recommended (Enteprise and Evaluation Editions will work too)

	The script
	Restores the database (edit the file locations for your instance)
	Expands and modifies the data
	21GB data files (multiple files in a couple of filegroups)
	3GB log file
	Duration on my test instance (4 vCPUs, SSDs): ~28 minutes

	This requires some tempdb space to do sorts, etc. I ran with 4x 1GB tempdb files.

	*****************************************************************************/

	SET XACT_ABORT, NOCOUNT, ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING ON;
	GO

	EXEC sp_configure 'show advanced options', 1;
	GO
	RECONFIGURE
	GO

	EXEC sp_configure 'max degree of parallelism', 4;
	GO

	EXEC sp_configure 'cost threshold for parallelism', 100
	GO

	EXEC sp_configure 'max server memory (MB)', 7000;
	GO

	RECONFIGURE
	GO


	/****************************************************
	Restore small BabbyNames database
	****************************************************/
	use master;
	GO

	IF DB_ID('BabbyNames201711') IS NOT NULL
	BEGIN
	IF (SELECT state_desc FROM sys.databases WHERE name='BabbyNames201711') = 'ONLINE'
	ALTER DATABASE BabbyNames201711
	SET SINGLE_USER
	WITH ROLLBACK IMMEDIATE;
	END
	GO

	RESTORE DATABASE BabbyNames201711
	FROM DISK=N'S:\MSSQL\Backup\BabbyNames.bak'
	WITH
	MOVE 'BabbyNames' TO 'S:\MSSQL\DATA\BabbyNames201711.mdf',
	MOVE 'BabbyNames_log' TO 'S:\MSSQL\DATA\BabbyNames201711_log.ldf',
	REPLACE,
	RECOVERY;
	GO

	ALTER DATABASE BabbyNames201711 SET RECOVERY SIMPLE;
	GO


	ALTER DATABASE BabbyNames201711
	ADD FILEGROUP fg_FirstNameByBirthDate;
	GO


	ALTER DATABASE BabbyNames201711 ADD FILE
	( NAME = fg_FirstNameByBirthDate_f1,
	FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f1.ndf',
	SIZE = 5GB,
	FILEGROWTH = 512MB
	) TO FILEGROUP fg_FirstNameByBirthDate;
	GO

	ALTER DATABASE BabbyNames201711 ADD FILE
	( NAME = fg_FirstNameByBirthDate_f2,
	FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f2.ndf',
	SIZE = 5GB,
	FILEGROWTH = 512MB
	) TO FILEGROUP fg_FirstNameByBirthDate;
	GO

	ALTER DATABASE BabbyNames201711 ADD FILE
	( NAME = fg_FirstNameByBirthDate_f3,
	FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f3.ndf',
	SIZE = 5GB,
	FILEGROWTH = 512MB
	) TO FILEGROUP fg_FirstNameByBirthDate;
	GO

	ALTER DATABASE BabbyNames201711 ADD FILE
	( NAME = fg_FirstNameByBirthDate_f4,
	FILENAME = 'S:\MSSQL\Data\BabbyNames201711_fg_FirstNameByBirthDate_f4.ndf',
	SIZE = 5GB,
	FILEGROWTH = 512MB
	) TO FILEGROUP fg_FirstNameByBirthDate;
	GO

	/* Make fg_FirstNameByBirthDate the default filegroup */
	ALTER DATABASE BabbyNames201711 MODIFY FILEGROUP fg_FirstNameByBirthDate DEFAULT;
	GO

	ALTER DATABASE BabbyNames201711 MODIFY FILE (NAME='BabbyNames_log', SIZE=3GB, FILEGROWTH=512MB);
	GO

	/* just in case you want to play around with Hekaton... */
	ALTER DATABASE BabbyNames201711 ADD FILEGROUP MemoryOptimizedData CONTAINS MEMORY_OPTIMIZED_DATA;
	GO

	ALTER DATABASE BabbyNames201711
	ADD FILE( NAME = 'MemoryOptimizedData' , FILENAME = 'S:\MSSQL\Data\BabbyNames201711_MemoryOptimizedData')
	TO FILEGROUP MemoryOptimizedData;
	GO


	/****************************************************
	Configure database and expand data
	****************************************************/

	SET STATISTICS IO, TIME OFF;
	GO
	SET XACT_ABORT, NOCOUNT ON;
	GO
	SET ANSI_NULLS, QUOTED_IDENTIFIER, ANSI_PADDING, ANSI_WARNINGS, ARITHABORT, CONCAT_NULL_YIELDS_NULL ON;
	GO
	SET NUMERIC_ROUNDABORT OFF;
	GO


	ALTER DATABASE BabbyNames201711 SET QUERY_STORE = ON
	GO
	ALTER DATABASE BabbyNames201711 SET QUERY_STORE
	(OPERATION_MODE = READ_WRITE,
	CLEANUP_POLICY = (STALE_QUERY_THRESHOLD_DAYS = 365), MAX_STORAGE_SIZE_MB = 1024
	)
	GO


	ALTER DATABASE BabbyNames201711 SET COMPATIBILITY_LEVEL=140;
	GO

	ALTER DATABASE BabbyNames201711 SET TARGET_RECOVERY_TIME = 60 SECONDS;
	GO

	USE BabbyNames201711;
	GO

	EXEC evt.logme N'Restored small BabbyNames201711 database';
	GO

	ALTER DATABASE SCOPED CONFIGURATION SET QUERY_OPTIMIZER_HOTFIXES = OFF;
	GO

	ALTER DATABASE SCOPED CONFIGURATION SET LEGACY_CARDINALITY_ESTIMATION = OFF;
	GO



	/******************************************************/
	/* ref.Numbers */
	/******************************************************/

	/* Create ref.Numbers. This is a helper "numbers" table just to help us in the next step.*/
	IF SCHEMA_ID('ref') IS NULL
	BEGIN
	EXEC evt.logme N'Create schema ref.';

	EXEC ('CREATE SCHEMA ref AUTHORIZATION dbo');
	END
	GO

	EXEC evt.logme N'Create ref.Numbers.';
	GO
	IF OBJECT_ID('ref.Numbers','U') IS NOT NULL
	BEGIN
	EXEC evt.logme N'Table ref.Numbers already exists, dropping.';

	DROP TABLE ref.Numbers;
	END
	GO

	CREATE TABLE ref.Numbers (
	Num INT NOT NULL,
	) on fg_FirstNameByBirthDate;
	GO

	EXEC evt.logme N'Load ref.Numbers.';
	GO
	INSERT ref.Numbers
	(Num)
	SELECT TOP 10000000
	ROW_NUMBER() OVER (ORDER BY fn1.ReportYear)
	FROM agg.FirstNameByYear AS fn1
	CROSS JOIN agg.FirstNameByYear AS fn2;
	GO

	EXEC evt.logme N'Index and key ref.Numbers.';
	GO

	CREATE CLUSTERED COLUMNSTORE INDEX ccx_ref_Numbers on ref.Numbers;
	GO

	ALTER TABLE ref.Numbers
	ADD CONSTRAINT pk_refNumbers_Num
	PRIMARY KEY NONCLUSTERED (Num)
	ON fg_FirstNameByBirthDate;
	GO


	/******************************************************/
	/* Helper index */
	/******************************************************/

	EXEC evt.logme N'Create nccx_halp ON agg.FirstNameByYear.';
	GO

	CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_halp on agg.FirstNameByYear
	(FirstNameId, ReportYear, NameCount, Gender)
	ON fg_FirstNameByBirthDate
	GO



	/******************************************************/
	/* Create and load dbo.FirstNameByBirthDate */
	/******************************************************/

	EXEC evt.logme N'Load dbo.FirstNameByBirthDateStage';
	GO

	/* This select into gets parallel insert plan, 20170906.
	We set the default filegroup above so this will go into fg_FirstNameByBirthDate,
	I am just using the hint added in 2017 for future proofing. */
	SELECT
	ISNULL(CAST(ROW_NUMBER() OVER (ORDER BY (SELECT 'boop')) AS BIGINT),0) AS FirstNameByBirthDateId,
	DATEADD(mi,n.Num * 5.1,CAST('1/1/' + CAST(ReportYear AS CHAR(4)) AS datetime2(0))) as FakeBirthDateStamp,
	fn.StateCode,
	fn.FirstNameId,
	Gender,
	CAST(NULL AS TINYINT) as Flag1,
	CAST(NULL AS CHAR(1)) as Flag2
	INTO dbo.FirstNameByBirthDateStage ON fg_FirstNameByBirthDate /* This added in SQL Server 2017*/
	FROM agg.FirstNameByYearState AS fn
	CROSS APPLY (select Num from ref.Numbers where Num <= fn.NameCount) AS n
	WHERE fn.ReportYear >= 1970 /* Limit size of dataset here */
	OPTION (RECOMPILE);
	GO

	EXEC evt.logme N'Add BirthYear computed column to dbo.FirstNameByBirthDateStage';
	GO

	ALTER TABLE dbo.FirstNameByBirthDateStage
	ADD BirthYear as YEAR(FakeBirthDateStamp);
	GO

	EXEC evt.logme N'Create dbo.FirstNameByBirthDate, which has an identity property';
	GO


	CREATE TABLE dbo.FirstNameByBirthDate (
	FirstNameByBirthDateId BIGINT IDENTITY(1,1),
	FakeBirthDateStamp DATETIME2(0),
	StateCode CHAR(2) NOT NULL,
	FirstNameId INT NOT NULL,
	Gender CHAR(1) NOT NULL,
	Flag1 TINYINT NULL,
	Flag2 CHAR(1) NULL,
	BirthYear AS YEAR(FakeBirthDateStamp)
	) ON fg_FirstNameByBirthDate
	GO



	EXEC evt.logme N'Switch data from dbo.FirstNameByBirthDateStage to dbo.FirstNameByBirthDate';
	GO

	ALTER TABLE dbo.FirstNameByBirthDateStage SWITCH TO dbo.FirstNameByBirthDate;
	GO


	EXEC evt.logme N'DBCC CHECKIDENT RESEED for dbo.FirstNameByBirthDateStage';
	GO

	DBCC CHECKIDENT ('dbo.FirstNameByBirthDate', RESEED);
	GO


	EXEC evt.logme N'DROP TABLE FirstNameByBirthDateStage';
	GO
	DROP TABLE FirstNameByBirthDateStage;
	GO

	EXEC evt.logme N'Create clustered PK on dbo.FirstNameByBirthDate';
	GO

	ALTER TABLE dbo.FirstNameByBirthDate
	ADD CONSTRAINT pk_FirstNameByBirthDate_FirstNameByBirthDateId
	PRIMARY KEY CLUSTERED (FirstNameByBirthDateId)
	WITH (SORT_IN_TEMPDB = ON,
	DATA_COMPRESSION = ROW)
	ON fg_FirstNameByBirthDate;
	GO

	EXEC evt.logme N'Clean up nccx_halp ON agg.FirstNameByYear.';
	GO
	DROP INDEX IF EXISTS nccx_halp ON agg.FirstNameByYear;
	GO



	/******************************************************/
	/* Partitioned table */
	/******************************************************/
	EXEC evt.logme N'CREATE SCHEMA pt AUTHORIZATION dbo';
	GO
	CREATE SCHEMA pt AUTHORIZATION dbo;
	GO

	EXEC evt.logme N'CREATE PARTITION FUNCTION pf_fnbd from 1879 to 2020.';
	GO
	declare @dsql nvarchar(max)=N'
	CREATE PARTITION FUNCTION pf_fnbd (DATETIME2(0))
	AS RANGE RIGHT
	FOR VALUES ( ' + (
	SELECT STUFF(
	(SELECT ', ''' + CAST(Num as CHAR(4)) + '-01-01'''
	FROM ref.Numbers
	WHERE Num between 1959 and 2020
	FOR XML PATH(''))
	,1,1,'')
	)+N'
	);'

	EXEC (@dsql);
	GO

	EXEC evt.logme N'CREATE PARTITION SCHEME ps_fnbd.';
	GO
	CREATE PARTITION SCHEME ps_fnbd
	AS PARTITION pf_fnbd
	ALL TO (fg_FirstNameByBirthDate);
	GO

	EXEC evt.logme N'CREATE table pt.FirstNameByBirthDate';
	GO
	CREATE TABLE pt.FirstNameByBirthDate (
	FakeBirthDateStamp DATETIME2(0) NOT NULL,
	FirstNameByBirthDateId BIGINT IDENTITY NOT NULL,
	StateCode CHAR(2) NOT NULL,
	FirstNameId INT NULL,
	Gender CHAR(1) NULL,
	Flag1 TINYINT NULL,
	Flag2 CHAR(1) NULL,
	BirthYear AS YEAR(FakeBirthDateStamp)
	) ON ps_fnbd (FakeBirthDateStamp)
	GO


	EXEC evt.logme N'Insert data from dbo.FirstNameByBirthDate';
	GO
	SET IDENTITY_INSERT pt.FirstNameByBirthDate ON;
	GO


	INSERT pt.FirstNameByBirthDate WITH (TABLOCK)
	(FirstNameByBirthDateId, FakeBirthDateStamp, StateCode, FirstNameId, Gender, Flag1, Flag2)
	SELECT FirstNameByBirthDateId, FakeBirthDateStamp, StateCode, FirstNameId, Gender, Flag1, Flag2
	FROM dbo.FirstNameByBirthDate
	LEFT JOIN ref.Numbers as n on 1 = -1 /* pointless join to get batch mode on sort operator */
	;
	GO

	SET IDENTITY_INSERT pt.FirstNameByBirthDate OFF;
	GO


	EXEC evt.logme N'CREATE UNIQUE CLUSTERED INDEX cx_pt_FirstNameByBirthDate';
	GO
	CREATE UNIQUE CLUSTERED INDEX cx_pt_FirstNameByBirthDate ON
	pt.FirstNameByBirthDate (FirstNameByBirthDateId, FakeBirthDateStamp)
	WITH (SORT_IN_TEMPDB = ON,
	DATA_COMPRESSION = ROW)
	ON ps_fnbd (FakeBirthDateStamp);
	GO


	EXEC evt.logme N'DBCC CHECKIDENT RESEED for pt.FirstNameByBirthDateStage';
	GO

	DBCC CHECKIDENT ('pt.FirstNameByBirthDate', RESEED);
	GO



	/******************************************************/
	/* Foreign key constraints */
	/******************************************************/

	EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_FirstNameId';
	GO

	ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK
	ADD CONSTRAINT FK_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId)
	REFERENCES ref.FirstName (FirstNameId);
	GO


	EXEC evt.logme N'Create FK FK_pt_FirstNameByBirthDate_FirstNameId';
	GO

	ALTER TABLE pt.FirstNameByBirthDate WITH CHECK
	ADD CONSTRAINT FK_pt_FirstNameByBirthDate_FirstNameId FOREIGN KEY (FirstNameId)
	REFERENCES ref.FirstName (FirstNameId);
	GO

	EXEC evt.logme N'Create FK FK_FirstNameByBirthDate_StateCode';
	GO

	ALTER TABLE dbo.FirstNameByBirthDate WITH CHECK
	ADD CONSTRAINT FK_FirstNameByBirthDate_StateCode FOREIGN KEY (StateCode)
	REFERENCES ref.State (StateCode);
	GO

	EXEC evt.logme N'Create FK FK_pt_FirstNameByBirthDate_StateCode';
	GO

	ALTER TABLE pt.FirstNameByBirthDate WITH CHECK
	ADD CONSTRAINT FK_pt_FirstNameByBirthDate_StateCode FOREIGN KEY (StateCode)
	REFERENCES ref.State (StateCode);
	GO


	/******************************************************/
	/* Indexes */
	/******************************************************/

	CREATE NONCLUSTERED INDEX dbo_FirstNameByBirthDate_BirthYear
	ON dbo.FirstNameByBirthDate ( BirthYear ASC );
	GO

	CREATE NONCLUSTERED INDEX pt_FirstNameByBirthDate_BirthYear
	ON pt.FirstNameByBirthDate ( BirthYear ASC );
	GO

	CREATE NONCLUSTERED INDEX dbo_FirstNameByBirthDate_FirstNameId
	ON dbo.FirstNameByBirthDate ( FirstNameId ASC );
	GO

	CREATE NONCLUSTERED INDEX pt_FirstNameByBirthDate_FirstNameId
	ON pt.FirstNameByBirthDate ( FirstNameId ASC );
	GO

	CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_pt_FirstNameByBirthDate
	ON pt.FirstNameByBirthDate�
	( FakeBirthDateStamp, FirstNameByBirthDateId, StateCode, FirstNameId, Gender, Flag1, Flag2);
	GO


	/******************************************************/
	/* Backup */
	/******************************************************/

	EXEC evt.logme N'Run a full backup';
	GO


	BACKUP DATABASE BabbyNames201711
	TO DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak',
	DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak',
	DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak',
	DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak'
	WITH INIT, COMPRESSION, STATS=5;
	GO


	/* Restore command (for reference) */
	--use master;
	--GO

	--IF DB_ID('BabbyNames201711') IS NOT NULL
	--BEGIN
	-- ALTER DATABASE BabbyNames201711
	-- SET SINGLE_USER
	-- WITH ROLLBACK IMMEDIATE;
	--END
	--GO

	--RESTORE DATABASE BabbyNames201711
	-- FROM DISK=N'S:\MSSQL\Backup\BabbyNames201711_1of4.bak',
	-- DISK=N'S:\MSSQL\Backup\BabbyNames201711_2of4.bak',
	-- DISK=N'S:\MSSQL\Backup\BabbyNames201711_3of4.bak',
	-- DISK=N'S:\MSSQL\Backup\BabbyNames201711_4of4.bak'
	-- WITH
	-- MOVE 'BabbyNames' TO 'S:\MSSQL\Data\BabbyNames201711.mdf',
	-- MOVE 'BabbyNames_log' TO 'S:\MSSQL\Data\BabbyNames201711_log.ldf',
	-- REPLACE,
	-- RECOVERY;
	--GO



	/******************************************************/
	/* All done */
	/******************************************************/

	EXEC evt.logme N'BEEP BOOP WE ARE DONE';
	GO