/LenCardinalityEstimates2014.sql

## LenCardinalityEstimates2014.sql
-- Create a table with 1MM rows of dummy data
CREATE TABLE #customers (cust_nbr VARCHAR(10) NOT NULL)
GO
INSERT INTO #customers WITH (TABLOCK) (cust_nbr)
SELECT TOP 1000000 CONVERT(VARCHAR(10),
		ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
	) AS cust_nbr
FROM master..spt_values v1
CROSS JOIN master..spt_values v2
GO
-- Looking for string of a certain length.
-- While both CEs yield fairly poor estimates, the 2012 CE is much
-- more conservative (higher estimate) and therefore much more likely
-- to yield an okay plan rather than a drastically understimated loop join.
-- 2012: 31,622 rows estimated, 900K rows actual
-- 2014: 1 row estimated, 900K rows actual
SELECT COUNT(*)
FROM #customers
WHERE LEN(cust_nbr) = 6
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO

-- Basic query to make sure we have reasonable statistics
-- 2012: 1MM rows estimated, 1MM rows actual
-- 2014: 1MM rows estimated, 1MM rows actual
SELECT COUNT(*)
FROM #customers
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO

-- Using a BETWEEN clause that must, by definition, match every row.
-- Neither CE is smart enough to pick up on this.
-- 2012: 90,000 rows estimated, 1MM rows actual
-- 2014: 164,317 rows estimated, 1MM rows actual
SELECT COUNT(*)
FROM #customers
WHERE LEN(cust_nbr) BETWEEN 0 AND 10
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO

-- Using a BETWEEN clause that resolves to only "6" yields the same result as the fully covering BETWEEN clause
-- Both CEs appear to use a dummy estimate rather than using the specific values
-- 2012: 90,000 rows estimated, 900K rows actual
-- 2014: 164,317 rows estimated, 900K rows actual
SELECT COUNT(*)
FROM #customers
WHERE LEN(cust_nbr) BETWEEN 5.5 AND 6.5
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO

-- Select the distinct lengths; 2014 seems to believe that nearly every row has a distinct length
-- 2012: 1,000 rows estimated, 7 rows actual
-- 2014: 999,972 rows estimated, 7 rows actual
SELECT DISTINCT LEN(cust_nbr)
FROM #customers
OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
GO

-- UPDATE STATISTICS with FULLSCAN and then run the above tests again
-- The poor cardinality estimates remain.
UPDATE STATISTICS #customers WITH FULLSCAN
GO

-- Add computed column to allow statistics on the length of the string
-- This yields nearly perfect cardinality estimates in both 2012 and 2014
ALTER TABLE #customers
ADD cust_nbr_len AS LEN(cust_nbr)
GO

-- Cleanup
DROP TABLE #customers
GO
	-- Create a table with 1MM rows of dummy data
	CREATE TABLE #customers (cust_nbr VARCHAR(10) NOT NULL)
	GO
	INSERT INTO #customers WITH (TABLOCK) (cust_nbr)
	SELECT TOP 1000000 CONVERT(VARCHAR(10),
	ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
	) AS cust_nbr
	FROM master..spt_values v1
	CROSS JOIN master..spt_values v2
	GO
	-- Looking for string of a certain length.
	-- While both CEs yield fairly poor estimates, the 2012 CE is much
	-- more conservative (higher estimate) and therefore much more likely
	-- to yield an okay plan rather than a drastically understimated loop join.
	-- 2012: 31,622 rows estimated, 900K rows actual
	-- 2014: 1 row estimated, 900K rows actual
	SELECT COUNT(*)
	FROM #customers
	WHERE LEN(cust_nbr) = 6
	OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
	GO

	-- Basic query to make sure we have reasonable statistics
	-- 2012: 1MM rows estimated, 1MM rows actual
	-- 2014: 1MM rows estimated, 1MM rows actual
	SELECT COUNT(*)
	FROM #customers
	OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
	GO

	-- Using a BETWEEN clause that must, by definition, match every row.
	-- Neither CE is smart enough to pick up on this.
	-- 2012: 90,000 rows estimated, 1MM rows actual
	-- 2014: 164,317 rows estimated, 1MM rows actual
	SELECT COUNT(*)
	FROM #customers
	WHERE LEN(cust_nbr) BETWEEN 0 AND 10
	OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
	GO

	-- Using a BETWEEN clause that resolves to only "6" yields the same result as the fully covering BETWEEN clause
	-- Both CEs appear to use a dummy estimate rather than using the specific values
	-- 2012: 90,000 rows estimated, 900K rows actual
	-- 2014: 164,317 rows estimated, 900K rows actual
	SELECT COUNT(*)
	FROM #customers
	WHERE LEN(cust_nbr) BETWEEN 5.5 AND 6.5
	OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
	GO

	-- Select the distinct lengths; 2014 seems to believe that nearly every row has a distinct length
	-- 2012: 1,000 rows estimated, 7 rows actual
	-- 2014: 999,972 rows estimated, 7 rows actual
	SELECT DISTINCT LEN(cust_nbr)
	FROM #customers
	OPTION (QUERYTRACEON 9481) -- Optionally, use 2012 CE
	GO

	-- UPDATE STATISTICS with FULLSCAN and then run the above tests again
	-- The poor cardinality estimates remain.
	UPDATE STATISTICS #customers WITH FULLSCAN
	GO

	-- Add computed column to allow statistics on the length of the string
	-- This yields nearly perfect cardinality estimates in both 2012 and 2014
	ALTER TABLE #customers
	ADD cust_nbr_len AS LEN(cust_nbr)
	GO

	-- Cleanup
	DROP TABLE #customers
	GO