Created
November 4, 2015 14:18
-
-
Save anonymous/f1dfbe918f750f70e7eb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Create a table with 1MM rows of dummy data | |
CREATE TABLE #customers (cust_nbr VARCHAR(10) NOT NULL) | |
GO | |
INSERT INTO #customers WITH (TABLOCK) (cust_nbr) | |
SELECT TOP 1000000 CONVERT(VARCHAR(10), | |
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) | |
) AS cust_nbr | |
FROM master..spt_values v1 | |
CROSS JOIN master..spt_values v2 | |
GO | |
-- Looking for string of a certain length. | |
-- While both CEs yield fairly poor estimates, the 2012 CE is much | |
-- more conservative (higher estimate) and therefore much more likely | |
-- to yield an okay plan rather than a drastically understimated loop join. | |
-- 2012: 31,622 rows estimated, 900K rows actual | |
-- 2014: 1 row estimated, 900K rows actual | |
SELECT COUNT(*) | |
FROM #customers | |
WHERE LEN(cust_nbr) = 6 | |
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE | |
GO | |
-- Basic query to make sure we have reasonable statistics | |
-- 2012: 1MM rows estimated, 1MM rows actual | |
-- 2014: 1MM rows estimated, 1MM rows actual | |
SELECT COUNT(*) | |
FROM #customers | |
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE | |
GO | |
-- Using a BETWEEN clause that must, by definition, match every row. | |
-- Neither CE is smart enough to pick up on this. | |
-- 2012: 90,000 rows estimated, 1MM rows actual | |
-- 2014: 164,317 rows estimated, 1MM rows actual | |
SELECT COUNT(*) | |
FROM #customers | |
WHERE LEN(cust_nbr) BETWEEN 0 AND 10 | |
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE | |
GO | |
-- Using a BETWEEN clause that resolves to only "6" yields the same result as the fully covering BETWEEN clause | |
-- Both CEs appear to use a dummy estimate rather than using the specific values | |
-- 2012: 90,000 rows estimated, 900K rows actual | |
-- 2014: 164,317 rows estimated, 900K rows actual | |
SELECT COUNT(*) | |
FROM #customers | |
WHERE LEN(cust_nbr) BETWEEN 5.5 AND 6.5 | |
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE | |
GO | |
-- Select the distinct lengths; 2014 seems to believe that nearly every row has a distinct length | |
-- 2012: 1,000 rows estimated, 7 rows actual | |
-- 2014: 999,972 rows estimated, 7 rows actual | |
SELECT DISTINCT LEN(cust_nbr) | |
FROM #customers | |
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE | |
GO | |
-- UPDATE STATISTICS with FULLSCAN and then run the above tests again | |
-- The poor cardinality estimates remain. | |
UPDATE STATISTICS #customers WITH FULLSCAN | |
GO | |
-- Add computed column to allow statistics on the length of the string | |
-- This yields nearly perfect cardinality estimates in both 2012 and 2014 | |
ALTER TABLE #customers | |
ADD cust_nbr_len AS LEN(cust_nbr) | |
GO | |
-- Cleanup | |
DROP TABLE #customers | |
GO |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment