Created
March 26, 2018 18:34
-
-
Save pattertall/68fdc2424b8f5707fdf115331b8b7ead to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
----------------------------------------------------------------------- | |
-- Create test data | |
----------------------------------------------------------------------- | |
SELECT @@VERSION | |
--Microsoft SQL Server 2017 (RTM-CU3-GDR) (KB4052987) - 14.0.3015.40 (X64) | |
-- Dec 22 2017 16:13:22 | |
-- Copyright (C) 2017 Microsoft Corporation | |
-- Enterprise Evaluation Edition (64-bit) on Windows Server 2016 Datacenter 10.0 <X64> (Build 14393: ) (Hypervisor) | |
USE tempdb /* server: GRANGER */ | |
GO | |
SET STATISTICS TIME, IO ON | |
GO | |
DROP TABLE IF EXISTS test_columnstore; | |
GO | |
-- Create a test_columnstore table with 100MM rows of pseudorandom data | |
;WITH E1(N) AS ( | |
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 | |
UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 | |
) | |
, E2(N) AS (SELECT 1 FROM E1 a CROSS JOIN E1 b) | |
, E4(N) AS (SELECT 1 FROM E2 a CROSS JOIN E2 b) | |
, E8(N) AS (SELECT 1 FROM E4 a CROSS JOIN E4 b) | |
SELECT (ABS(CAST(CAST(NEWID() AS VARBINARY) AS BIGINT)) % 5) + 1 AS col1 | |
, ABS(CAST(CAST(NEWID() AS VARBINARY) AS BIGINT)) * RAND() AS col2 | |
, ABS(CAST(CAST(NEWID() AS VARBINARY) AS BIGINT)) * RAND() AS col3 | |
, ABS(CAST(CAST(NEWID() AS VARBINARY) AS BIGINT)) * RAND() AS col4 | |
INTO dbo.test_columnstore | |
FROM E8 | |
OPTION (MAXDOP 8) | |
-- CPU time = 117859 ms, elapsed time = 13988 ms. | |
GO | |
CREATE CLUSTERED COLUMNSTORE INDEX cs_test_columnstore ON dbo.test_columnstore WITH (MAXDOP = 8) | |
-- CPU time = 163296 ms, elapsed time = 29429 ms. | |
GO | |
----------------------------------------------------------------------- | |
-- Test query | |
-- This query must perform a hash aggregate on 100 million rows, | |
-- and then a batch mode sort on 99 million rows. It would likely | |
-- be more efficient to perform a single batch mode "Distinct Sort", | |
-- removing the need for the entire Hash Match operator. | |
----------------------------------------------------------------------- | |
-- Overall: CPU time = 90982 ms, elapsed time = 7944 ms. | |
-- Hash Aggregate: CPU time = 16299 ms, elapsed time = 1434 ms. | |
SELECT AVG(1.0*rn) | |
FROM ( | |
SELECT ROW_NUMBER() OVER (ORDER BY col1, col2) AS rn | |
FROM ( | |
SELECT DISTINCT col1, col2 | |
FROM test_columnstore | |
) y | |
) x | |
GO | |
----------------------------------------------------------------------- | |
-- Test query: OPTION (ORDER GROUP) | |
-- If we remove the option of a hash aggregate to implement the DISTINCT | |
-- calculation, SQL Server now chooses a "Distinct Sort", but the query | |
-- suffers because the far less efficient row mode Sort operator is used. | |
-- In addition, SQL Server chains two Sort operators together (one batch | |
-- mode and one row mode), which should not be necessary. A single batch | |
-- mode "Distinct Sort" should be sufficient. | |
----------------------------------------------------------------------- | |
-- Overall: CPU time = 381325 ms, elapsed time = 45686 ms. | |
-- Distinct Sort: CPU time = 296047 ms, elapsed time = 36051 ms. | |
-- Repartition Streams: CPU time = 14560 ms, elapsed time = 11850 ms. | |
SELECT AVG(1.0*rn) | |
FROM ( | |
SELECT ROW_NUMBER() OVER (ORDER BY col1, col2) AS rn | |
FROM ( | |
SELECT DISTINCT col1, col2 | |
FROM test_columnstore | |
) y | |
) x | |
OPTION (ORDER GROUP) | |
GO |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment