Skip to content

Instantly share code, notes, and snippets.

Created November 4, 2015 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/f1dfbe918f750f70e7eb to your computer and use it in GitHub Desktop.
Save anonymous/f1dfbe918f750f70e7eb to your computer and use it in GitHub Desktop.
-- Create a table with 1MM rows of dummy data
CREATE TABLE #customers (cust_nbr VARCHAR(10) NOT NULL)
GO
INSERT INTO #customers WITH (TABLOCK) (cust_nbr)
SELECT TOP 1000000 CONVERT(VARCHAR(10),
ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
) AS cust_nbr
FROM master..spt_values v1
CROSS JOIN master..spt_values v2
GO
-- Looking for string of a certain length.
-- While both CEs yield fairly poor estimates, the 2012 CE is much
-- more conservative (higher estimate) and therefore much more likely
-- to yield an okay plan rather than a drastically understimated loop join.
-- 2012: 31,622 rows estimated, 900K rows actual
-- 2014: 1 row estimated, 900K rows actual
SELECT COUNT(*)
FROM #customers
WHERE LEN(cust_nbr) = 6
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO
-- Basic query to make sure we have reasonable statistics
-- 2012: 1MM rows estimated, 1MM rows actual
-- 2014: 1MM rows estimated, 1MM rows actual
SELECT COUNT(*)
FROM #customers
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO
-- Using a BETWEEN clause that must, by definition, match every row.
-- Neither CE is smart enough to pick up on this.
-- 2012: 90,000 rows estimated, 1MM rows actual
-- 2014: 164,317 rows estimated, 1MM rows actual
SELECT COUNT(*)
FROM #customers
WHERE LEN(cust_nbr) BETWEEN 0 AND 10
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO
-- Using a BETWEEN clause that resolves to only "6" yields the same result as the fully covering BETWEEN clause
-- Both CEs appear to use a dummy estimate rather than using the specific values
-- 2012: 90,000 rows estimated, 900K rows actual
-- 2014: 164,317 rows estimated, 900K rows actual
SELECT COUNT(*)
FROM #customers
WHERE LEN(cust_nbr) BETWEEN 5.5 AND 6.5
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO
-- Select the distinct lengths; 2014 seems to believe that nearly every row has a distinct length
-- 2012: 1,000 rows estimated, 7 rows actual
-- 2014: 999,972 rows estimated, 7 rows actual
SELECT DISTINCT LEN(cust_nbr)
FROM #customers
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO
-- UPDATE STATISTICS with FULLSCAN and then run the above tests again
-- The poor cardinality estimates remain.
UPDATE STATISTICS #customers WITH FULLSCAN
GO
-- Add computed column to allow statistics on the length of the string
-- This yields nearly perfect cardinality estimates in both 2012 and 2014
ALTER TABLE #customers
ADD cust_nbr_len AS LEN(cust_nbr)
GO
-- Cleanup
DROP TABLE #customers
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment