Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save LitKnd/157177799d4fe4e5ad7ad78d53afbfc5 to your computer and use it in GitHub Desktop.
Save LitKnd/157177799d4fe4e5ad7ad78d53afbfc5 to your computer and use it in GitHub Desktop.
/*****************************************************************************
Copyright (c) 2017 SQL Workbooks LLC
Terms of Use: https://sqlworkbooks.com/terms-of-service/
Contact: help@sqlworkbooks.com
Setup:
Download the database to restore from https://github.com/LitKnd/BabbyNames/releases/tag/v1.1
Either the large or small database works in this case.
(If you don't have the big one yet, use BabbyNames.bak.zip, it's only 41MB to download.)
You must restore to SQL Server 2016 or a higher version.
*****************************************************************************/
/* Restore the database */
use master;
GO
IF DB_ID('BabbyNames') IS NOT NULL
BEGIN
ALTER DATABASE BabbyNames SET SINGLE_USER WITH ROLLBACK IMMEDIATE;
END
GO
RESTORE DATABASE BabbyNames FROM
DISK = N'S:\MSSQL\Backup\BabbyNames.bak'
WITH REPLACE;
GO
use BabbyNames;
GO
/* We have some indexes on agg.FirstNameByYear */
IF 0 = (SELECT COUNT(*) FROM
sys.indexes
WHERE name='ix_FirstNameByYearState_FirstNameId_StateCode_Gender_ReportYear_INCLUDES')
CREATE INDEX ix_FirstNameByYearState_FirstNameId_StateCode_Gender_ReportYear_INCLUDES
ON agg.FirstNameByYearState
(FirstNameId, StateCode, Gender, ReportYear)
INCLUDE (NameCount)
GO
/* Create this procedure.
I'm not saying this is great code. It was written purposefully to have some issues :) */
DROP PROCEDURE IF EXISTS dbo.PopularNames
GO
CREATE PROCEDURE dbo.PopularNames
@Threshold INT = NULL
AS
with RunningTotal AS (
SELECT
fnby.FirstNameId,
fnby.StateCode,
fnby.Gender,
ReportYear,
SUM(NameCount) OVER (PARTITION BY fnby.FirstNameId, StateCode, Gender ORDER BY fnby.ReportYear) as TotalNamed
FROM agg.FirstNameByYearState as fnby
),
RunningTotalPlusLag AS (
SELECT
FirstNameId,
StateCode,
Gender,
ReportYear,
TotalNamed,
LAG(TotalNamed, 1, 0) OVER (PARTITION BY FirstNameId, StateCode, Gender ORDER BY ReportYear) AS TotalNamedPriorYear
FROM RunningTotal
)
SELECT
fn.FirstName,
RunningTotalPlusLag.StateCode,
RunningTotalPlusLag.Gender,
RunningTotalPlusLag.ReportYear,
RunningTotalPlusLag.TotalNamed,
RunningTotalPlusLag.TotalNamedPriorYear
FROM RunningTotalPlusLag
JOIN ref.FirstName as fn on
RunningTotalPlusLag.FirstNameId=fn.FirstNameId
WHERE
(@Threshold is NULL
and TotalNamed >= 100
and (TotalNamedPriorYear < 100 OR TotalNamedPriorYear IS NULL)
)
OR
(TotalNamed >= @Threshold
and (TotalNamedPriorYear < @Threshold OR TotalNamedPriorYear IS NULL)
)
ORDER BY ReportYear DESC, StateCode;
GO
SET STATISTICS TIME, IO ON;
GO
/*****************************************************
Run this with actual execution plans on. Save the plan.
*****************************************************/
EXEC dbo.PopularNames @Threshold = 100000
GO
/* Even when the data is in memory, this takes about a minute:
(191 row(s) affected)
Table 'FirstName'. Scan count 0, logical reads 382, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'Worktable'. Scan count 6052350, logical reads 34458102, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'FirstNameByYearState'. Scan count 1, logical reads 14960, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 58296 ms, elapsed time = 60087 ms.
*/
/* Now create this columnstore index. Yes, this is goofy. */
CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_agg_FirstNameByYearState
ON agg.FirstNameByYearState
(StateCode);
GO
/* Run this with actual execution plans on. Save the plan and compare */
EXEC dbo.PopularNames @Threshold = 100000
GO
/*
(191 row(s) affected)
Table 'Worktable'. Scan count 0, logical reads 0, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'FirstName'. Scan count 0, logical reads 382, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
Table 'FirstNameByYearState'. Scan count 1, logical reads 14960, physical reads 0, read-ahead reads 0, lob logical reads 0, lob physical reads 0, lob read-ahead reads 0.
SQL Server Execution Times:
CPU time = 2157 ms, elapsed time = 2164 ms.
*/
/* Why is it so much faster even though it's NOT USING THE COLUMNSTORE INDEX? */
/* The query isn't using the columnstore index. What if we use the same query, but
add a hint to make sure it CAN'T use the columnstore? */
DROP PROCEDURE IF EXISTS dbo.PopularNamesWithAHint
GO
CREATE PROCEDURE dbo.PopularNamesWithAHint
@Threshold INT = NULL
AS
with RunningTotal AS (
SELECT
fnby.FirstNameId,
fnby.StateCode,
fnby.Gender,
ReportYear,
SUM(NameCount) OVER (PARTITION BY fnby.FirstNameId, StateCode, Gender ORDER BY fnby.ReportYear) as TotalNamed
FROM agg.FirstNameByYearState as fnby
),
RunningTotalPlusLag AS (
SELECT
FirstNameId,
StateCode,
Gender,
ReportYear,
TotalNamed,
LAG(TotalNamed, 1, 0) OVER (PARTITION BY FirstNameId, StateCode, Gender ORDER BY ReportYear) AS TotalNamedPriorYear
FROM RunningTotal
)
SELECT
fn.FirstName,
RunningTotalPlusLag.StateCode,
RunningTotalPlusLag.Gender,
RunningTotalPlusLag.ReportYear,
RunningTotalPlusLag.TotalNamed,
RunningTotalPlusLag.TotalNamedPriorYear
FROM RunningTotalPlusLag
JOIN ref.FirstName as fn on
RunningTotalPlusLag.FirstNameId=fn.FirstNameId
WHERE
(@Threshold is NULL
and TotalNamed >= 100
and (TotalNamedPriorYear < 100 OR TotalNamedPriorYear IS NULL)
)
OR
(TotalNamed >= @Threshold
and (TotalNamedPriorYear < @Threshold OR TotalNamedPriorYear IS NULL)
)
ORDER BY ReportYear DESC, StateCode
OPTION (IGNORE_NONCLUSTERED_COLUMNSTORE_INDEX);
GO
EXEC dbo.PopularNamesWithAHint @Threshold = 100000
GO
/* Niko Neugebauer points out that in some cases this is a regression from behavior in 2014, if
you were using an index hint.
He points out that you can get the old functionality back if you're using hints in some cases by
lowering the compat level on the database to 120, but he also warns that will make you miss out on
many IMPROVEMENTS in batch mode execution that you get from 2016/compat mode 130.
In our case the batch mode windowing function that is part of what makes this fast is specific to 2016.
*/
/* Drop our goofy columnstore */
DROP INDEX nccx_agg_FirstNameByYearState ON agg.FirstNameByYearState;
GO
/* Create an even GOOFIER columnstore */
/* I learned about this hack from Itzik Ben-Gan */
/* Filtered columnstore indexes are 2016+ */
CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_agg_FirstNameByYearState
ON agg.FirstNameByYearState
(FirstNameId)
WHERE FirstNameId = -1 and FirstNameId = -2;
GO
/* It's really empty */
select ips.row_count, ips.reserved_page_count
from sys.dm_db_partition_stats ips
join sys.indexes as si on
ips.object_id=si.object_id and
ips.index_id = si.index_id
join sys.objects as so on
ips.object_id=so.object_id
where so.name='FirstNameByYearState'
and si.name='nccx_agg_FirstNameByYearState';
GO
EXEC dbo.PopularNames @Threshold = 100000
GO
/* Yep, that worked all right. */
DROP INDEX nccx_agg_FirstNameByYearState ON agg.FirstNameByYearState
GO
/* Here's another hack: a bogus join to a table with columnstore.
Itzik Ben-Gan wrote that he learned about this from Niko Neugebauer.
I've also seen Paul White use this in a Stack Overflow answer from 2014.
This method doesn't require SQL Server 2016.
*/
CREATE TABLE dbo.hack (i int identity);
GO
CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_hack on dbo.hack(i);
GO
DROP PROCEDURE IF EXISTS dbo.PopularNamesWithABogusJoin
GO
CREATE PROCEDURE dbo.PopularNamesWithABogusJoin
@Threshold INT = NULL
AS
with RunningTotal AS (
SELECT
fnby.FirstNameId,
fnby.StateCode,
fnby.Gender,
ReportYear,
SUM(NameCount) OVER (PARTITION BY fnby.FirstNameId, StateCode, Gender ORDER BY fnby.ReportYear) as TotalNamed
FROM agg.FirstNameByYearState as fnby
),
RunningTotalPlusLag AS (
SELECT
FirstNameId,
StateCode,
Gender,
ReportYear,
TotalNamed,
LAG(TotalNamed, 1, 0) OVER (PARTITION BY FirstNameId, StateCode, Gender ORDER BY ReportYear) AS TotalNamedPriorYear
FROM RunningTotal
)
SELECT
fn.FirstName,
RunningTotalPlusLag.StateCode,
RunningTotalPlusLag.Gender,
RunningTotalPlusLag.ReportYear,
RunningTotalPlusLag.TotalNamed,
RunningTotalPlusLag.TotalNamedPriorYear
FROM RunningTotalPlusLag
JOIN ref.FirstName as fn on
RunningTotalPlusLag.FirstNameId=fn.FirstNameId
/* LOL */
LEFT JOIN dbo.hack on 1=0
WHERE
(@Threshold is NULL
and TotalNamed >= 100
and (TotalNamedPriorYear < 100 OR TotalNamedPriorYear IS NULL)
)
OR
(TotalNamed >= @Threshold
and (TotalNamedPriorYear < @Threshold OR TotalNamedPriorYear IS NULL)
)
ORDER BY ReportYear DESC, StateCode
GO
EXEC dbo.PopularNamesWithABogusJoin @Threshold = 100000
GO
/* Can we use the bogus join method with a temp table?
*/
CREATE TABLE #hack (i int identity);
GO
CREATE NONCLUSTERED COLUMNSTORE INDEX nccx_hack on #hack(i);
GO
DROP PROCEDURE IF EXISTS dbo.PopularNamesWithABogusJoinToATempTable
GO
CREATE PROCEDURE dbo.PopularNamesWithABogusJoinToATempTable
@Threshold INT = NULL
AS
with RunningTotal AS (
SELECT
fnby.FirstNameId,
fnby.StateCode,
fnby.Gender,
ReportYear,
SUM(NameCount) OVER (PARTITION BY fnby.FirstNameId, StateCode, Gender ORDER BY fnby.ReportYear) as TotalNamed
FROM agg.FirstNameByYearState as fnby
),
RunningTotalPlusLag AS (
SELECT
FirstNameId,
StateCode,
Gender,
ReportYear,
TotalNamed,
LAG(TotalNamed, 1, 0) OVER (PARTITION BY FirstNameId, StateCode, Gender ORDER BY ReportYear) AS TotalNamedPriorYear
FROM RunningTotal
)
SELECT
fn.FirstName,
RunningTotalPlusLag.StateCode,
RunningTotalPlusLag.Gender,
RunningTotalPlusLag.ReportYear,
RunningTotalPlusLag.TotalNamed,
RunningTotalPlusLag.TotalNamedPriorYear
FROM RunningTotalPlusLag
JOIN ref.FirstName as fn on
RunningTotalPlusLag.FirstNameId=fn.FirstNameId
/* LOL */
LEFT JOIN #hack on 1=0
WHERE
(@Threshold is NULL
and TotalNamed >= 100
and (TotalNamedPriorYear < 100 OR TotalNamedPriorYear IS NULL)
)
OR
(TotalNamed >= @Threshold
and (TotalNamedPriorYear < @Threshold OR TotalNamedPriorYear IS NULL)
)
ORDER BY ReportYear DESC, StateCode
GO
exec dbo.PopularNamesWithABogusJoinToATempTable @Threshold = 100000;
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment