Skip to content

Instantly share code, notes, and snippets.

@pattertall
Created March 26, 2018 18:34
Show Gist options
  • Save pattertall/68fdc2424b8f5707fdf115331b8b7ead to your computer and use it in GitHub Desktop.
Save pattertall/68fdc2424b8f5707fdf115331b8b7ead to your computer and use it in GitHub Desktop.
-----------------------------------------------------------------------
-- Create test data
-----------------------------------------------------------------------
SELECT @@VERSION
--Microsoft SQL Server 2017 (RTM-CU3-GDR) (KB4052987) - 14.0.3015.40 (X64)
-- Dec 22 2017 16:13:22
-- Copyright (C) 2017 Microsoft Corporation
-- Enterprise Evaluation Edition (64-bit) on Windows Server 2016 Datacenter 10.0 <X64> (Build 14393: ) (Hypervisor)
USE tempdb /* server: GRANGER */
GO
SET STATISTICS TIME, IO ON
GO
DROP TABLE IF EXISTS test_columnstore;
GO
-- Create a test_columnstore table with 100MM rows of pseudorandom data
;WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
)
, E2(N) AS (SELECT 1 FROM E1 a CROSS JOIN E1 b)
, E4(N) AS (SELECT 1 FROM E2 a CROSS JOIN E2 b)
, E8(N) AS (SELECT 1 FROM E4 a CROSS JOIN E4 b)
SELECT (ABS(CAST(CAST(NEWID() AS VARBINARY) AS BIGINT)) % 5) + 1 AS col1
, ABS(CAST(CAST(NEWID() AS VARBINARY) AS BIGINT)) * RAND() AS col2
, ABS(CAST(CAST(NEWID() AS VARBINARY) AS BIGINT)) * RAND() AS col3
, ABS(CAST(CAST(NEWID() AS VARBINARY) AS BIGINT)) * RAND() AS col4
INTO dbo.test_columnstore
FROM E8
OPTION (MAXDOP 8)
-- CPU time = 117859 ms, elapsed time = 13988 ms.
GO
CREATE CLUSTERED COLUMNSTORE INDEX cs_test_columnstore ON dbo.test_columnstore WITH (MAXDOP = 8)
-- CPU time = 163296 ms, elapsed time = 29429 ms.
GO
-----------------------------------------------------------------------
-- Test query
-- This query must perform a hash aggregate on 100 million rows,
-- and then a batch mode sort on 99 million rows. It would likely
-- be more efficient to perform a single batch mode "Distinct Sort",
-- removing the need for the entire Hash Match operator.
-----------------------------------------------------------------------
-- Overall: CPU time = 90982 ms, elapsed time = 7944 ms.
-- Hash Aggregate: CPU time = 16299 ms, elapsed time = 1434 ms.
SELECT AVG(1.0*rn)
FROM (
SELECT ROW_NUMBER() OVER (ORDER BY col1, col2) AS rn
FROM (
SELECT DISTINCT col1, col2
FROM test_columnstore
) y
) x
GO
-----------------------------------------------------------------------
-- Test query: OPTION (ORDER GROUP)
-- If we remove the option of a hash aggregate to implement the DISTINCT
-- calculation, SQL Server now chooses a "Distinct Sort", but the query
-- suffers because the far less efficient row mode Sort operator is used.
-- In addition, SQL Server chains two Sort operators together (one batch
-- mode and one row mode), which should not be necessary. A single batch
-- mode "Distinct Sort" should be sufficient.
-----------------------------------------------------------------------
-- Overall: CPU time = 381325 ms, elapsed time = 45686 ms.
-- Distinct Sort: CPU time = 296047 ms, elapsed time = 36051 ms.
-- Repartition Streams: CPU time = 14560 ms, elapsed time = 11850 ms.
SELECT AVG(1.0*rn)
FROM (
SELECT ROW_NUMBER() OVER (ORDER BY col1, col2) AS rn
FROM (
SELECT DISTINCT col1, col2
FROM test_columnstore
) y
) x
OPTION (ORDER GROUP)
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment