Created
May 16, 2014 19:33
-
-
Save swasheck/eb7eb5f5f135280824ef to your computer and use it in GitHub Desktop.
stats_analysis.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DECLARE @sql varchar(max); | |
IF EXISTS (SELECT 1 FROM tempdb.sys.tables WHERE NAME='HISTOGRAM') | |
BEGIN | |
DROP TABLE tempdb.dbo.HISTOGRAM; | |
END | |
IF (SELECT CURSOR_STATUS('global','statcur')) >= -1 | |
BEGIN | |
DEALLOCATE statcur | |
END | |
IF (SELECT CURSOR_STATUS('global','histo_cur')) >= -1 | |
BEGIN | |
DEALLOCATE histo_cur | |
END | |
IF EXISTS (SELECT 1 FROM tempdb.sys.tables WHERE name = 'skewed_columns') | |
begin | |
drop table tempdb.dbo.skewed_columns; | |
end | |
CREATE TABLE tempdb.dbo.skewed_columns ( | |
schema_name sysname, | |
table_name sysname, | |
column_name sysname, | |
stat_name sysname null, | |
skewfactor real, | |
analysis_range nvarchar(max), | |
stat_sql nvarchar(max) | |
); | |
-- CURSOR VARIABLES | |
DECLARE | |
@SchemaName sysname, | |
@TableName sysname, | |
@StatColumnName sysname, | |
@StatName sysname, | |
@StatColType sysname, | |
@StatColPrecision int, | |
@StatColMaxLen int, | |
@StatColScale int, | |
@StatColCollation sysname; | |
declare statcur cursor fast_forward read_only for | |
select | |
ss.name as schema_name, | |
t.name as table_name, | |
c.name stat_column_name, | |
s.name stat_name, | |
ty.name stat_column_type, | |
c.precision stat_column_precision, | |
c.max_length stat_column_max_length, | |
c.scale stat_column_scale, | |
c.collation_name | |
from sys.columns c | |
join sys.tables t | |
on c.object_id = t.object_id | |
join sys.stats s | |
on t.object_id = s.object_id | |
and c.name = INDEX_COL(OBJECT_NAME(s.object_id),stats_id,1) | |
join sys.types ty | |
on c.user_type_id = ty.user_type_id | |
join sys.schemas ss | |
on t.schema_id = ss.schema_id | |
where ss.name <> 'sys' | |
and ty.name not in ('xml','ntext','text') -- not a comparable data type | |
open statcur | |
fetch next from statcur into | |
@SchemaName, | |
@TableName, | |
@StatColumnName, | |
@StatName, | |
@StatColType, | |
@StatColPrecision, | |
@StatColMaxLen, | |
@StatColScale, | |
@StatColCollation; | |
IF EXISTS (SELECT 1 FROM tempdb.sys.tables where name = 'DENSITY_VECTOR') | |
BEGIN | |
DROP TABLE tempdb.dbo.DENSITY_VECTOR; | |
END | |
CREATE TABLE tempdb.dbo.DENSITY_VECTOR ( | |
AllDensity REAL, | |
AverageLength REAL, | |
[Columns] NVARCHAR(4000) | |
) | |
while @@fetch_status = 0 | |
begin | |
print 'Stat Table Name: ' + @TableName; | |
print 'Stat Object Name: ' + @StatName; | |
print 'Stat Column Name: ' + @StatColumnName; | |
print 'Stat Column Type: ' + @StatColType; | |
print 'Stat Column Precision: ' + CAST(@StatColPrecision as nvarchar(255)) | |
print 'Stat Column Scale: ' + CAST(@StatColScale as nvarchar(255)) | |
print 'Stat Column MaxLen: ' + CAST(@StatColMaxLen as nvarchar(255)) | |
set @sql = 'DBCC SHOW_STATISTICS(''' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@TableName) +''',' + QUOTENAME(@StatColumnName) + ') WITH DENSITY_VECTOR, NO_INFOMSGS;'; | |
begin try | |
print 'POPULATING DENSITY VECTOR (''' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@TableName) +''',' + QUOTENAME(@StatColumnName) + ')' | |
INSERT INTO tempdb.dbo.DENSITY_VECTOR | |
execute(@sql); | |
end try | |
begin catch | |
print @sql; | |
print error_message(); | |
end catch | |
set @sql = ' | |
IF EXISTS (SELECT 1 FROM tempdb.sys.tables WHERE NAME = ''HISTOGRAM'') | |
BEGIN | |
DROP TABLE tempdb.dbo.HISTOGRAM; | |
END | |
CREATE TABLE tempdb.dbo.HISTOGRAM ( | |
RANGE_HI_KEY ' + | |
case when @StatColType in ('float','tinyint', 'smallint', 'int', 'bigint','datetime','sysname') | |
then @StatColType | |
when @StatColType IN ('nchar','nvarchar') and (@StatColMaxLen = 8000 OR @StatColMaxLen = -1) | |
then @StatColType + '(max)' | |
WHEN @StatColType IN ('char', 'varchar') | |
THEN | |
CASE | |
WHEN (@StatColMaxLen = 8000 OR @StatColMaxLen = -1) | |
then @StatColType + '(max)' | |
ELSE | |
@StatColType + '(' + CONVERT(VARCHAR(4),@StatColMaxLen) + ') COLLATE ' + @StatColCollation | |
END | |
WHEN @StatColType IN ('datetime2', 'datetimeoffset', 'time') | |
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColScale) + ')' | |
WHEN @StatColType IN ('numeric', 'decimal') | |
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColPrecision) + ',' + CONVERT(VARCHAR(5), @StatColScale) + ')' | |
end + ', | |
RANGE_ROWS REAL, | |
EQ_ROWS BIGINT, | |
DISTINCT_RANGE_ROWS BIGINT, | |
AVG_RANGE_ROWS REAL | |
);'; | |
begin try | |
print 'CREATING HISTOGRAM' | |
execute(@sql); | |
end try | |
begin catch | |
print error_message(); | |
end catch | |
SET @sql = 'DBCC SHOW_STATISTICS(''' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@TableName) +''',' + QUOTENAME(@StatName) + ') WITH HISTOGRAM, NO_INFOMSGS'; | |
begin try | |
print 'POPULATING HISTOGRAM' + ' DBCC SHOW_STATISTICS(''' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@TableName) +''',' + QUOTENAME(@StatName) + ') WITH HISTOGRAM'; | |
INSERT INTO tempdb.dbo.HISTOGRAM | |
execute(@sql); | |
end try | |
begin catch | |
print 'populate histo error type: ' + cast(@@error as nvarchar(7)); | |
print error_message(); | |
use tempdb | |
select @TableName,@SchemaName,@StatColumnName,@StatName; | |
exec sp_help HISTOGRAM | |
use idw; | |
end catch | |
begin try | |
if exists ( | |
SELECT 1 | |
FROM tempdb.sys.columns c | |
JOIN tempdb.sys.tables t | |
on c.object_id = t.object_id | |
WHERE t.name = 'HISTOGRAM' and c.name = 'skew_factor' | |
) | |
begin | |
alter table tempdb.dbo.HISTOGRAM | |
drop column skew_factor | |
end | |
alter table tempdb.dbo.HISTOGRAM | |
add skew_factor real null; | |
end try | |
begin catch | |
print 'error adding skew_factor' | |
print error_message(); | |
end catch | |
set @sql = ' | |
declare @avg_range_rows real, @skewfactor real, @isql nvarchar(max),@parmdef nvarchar(500); | |
set @parmdef = N''@skewfactor real output''; | |
declare @thiskey '+ | |
case when @StatColType in ('tinyint', 'smallint', 'int', 'bigint','datetime','sysname') | |
then @StatColType | |
when @StatColType IN ('nchar','nvarchar') and @StatColMaxLen = 8000 | |
then @StatColType + '(max) ' | |
WHEN @StatColType IN ('char', 'varchar', 'nchar', 'nvarchar') | |
THEN @StatColType + '(' + CONVERT(VARCHAR(4),@StatColMaxLen) + ') ' | |
WHEN @StatColType IN ('datetime2', 'datetimeoffset', 'time') | |
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColScale) + ')' | |
WHEN @StatColType IN ('numeric', 'decimal') | |
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColPrecision) + ',' + CONVERT(VARCHAR(5), @StatColScale) + ')' | |
end | |
+',@lastkey '+ | |
case when @StatColType in ('date','tinyint', 'smallint', 'int', 'bigint','datetime','sysname') | |
then @StatColType | |
when @StatColType IN ('nchar','nvarchar') and @StatColMaxLen = 8000 | |
then @StatColType + '(max) ' | |
WHEN @StatColType IN ('char', 'varchar', 'nchar', 'nvarchar') | |
THEN @StatColType + '(' + CONVERT(VARCHAR(4),@StatColMaxLen) + ') ' | |
WHEN @StatColType IN ('datetime2', 'datetimeoffset', 'time') | |
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColScale) + ')' | |
WHEN @StatColType IN ('numeric', 'decimal') | |
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColPrecision) + ',' + CONVERT(VARCHAR(5), @StatColScale) + ')' | |
end + ' | |
declare histo_cur cursor fast_forward read_only for | |
select RANGE_HI_KEY, AVG_RANGE_ROWS | |
from tempdb.dbo.HISTOGRAM | |
order by RANGE_HI_KEY ' + | |
CASE | |
WHEN @StatColType IN ('char', 'varchar', 'nchar', 'nvarchar') | |
THEN 'COLLATE ' + @StatColCollation | |
ELSE '' | |
END + ' | |
open histo_cur; | |
fetch next from histo_cur | |
into @thiskey,@avg_range_rows; | |
while @@fetch_status = 0 | |
begin | |
set @isql = '' | |
select | |
@skewfactor = | |
case when abs(1.*''+cast(@avg_range_rows as nvarchar(25))+''-MinimumCount) > abs(1.*''+cast(@avg_range_rows as nvarchar(25))+''-MaximumCount) | |
then abs(''+cast(@avg_range_rows as nvarchar(25))+''-MinimumCount) / ''+cast(@avg_range_rows as nvarchar(25))+'' | |
else | |
abs(1.*''+cast(@avg_range_rows as nvarchar(25))+''-MaximumCount) / ''+cast(@avg_range_rows as nvarchar(25))+'' | |
end | |
from ( | |
select | |
COUNT(DISTINCT '+@StatColumnName+') ACTUAL_DISTINCT_RANGE_ROWS, | |
SUM(group_count) ACTUAL_RANGE_ROWS, | |
MAX(group_count) MaximumCount, | |
MIN(group_count) MinimumCount, | |
CASE WHEN count(distinct '+@StatColumnName+') = 0 THEN 1 | |
ELSE | |
1. * (COUNT('+@StatColumnName+') / count(distinct '+@StatColumnName+')) | |
END | |
ACTUAL_AVG_RANGE_ROWS | |
from ( | |
select | |
'+@StatColumnName+', | |
count('+@StatColumnName+') group_count | |
from '+@TableName+' | |
where | |
'+@StatColumnName+' > ''+ | |
case when '''+@StatColType+''' in (''int'',''decimal'',''numeric'',''bigint'',''real'') | |
then | |
cast(@lastkey as nvarchar(255)) | |
else | |
'''''''' + CAST(@lastkey as NVARCHAR(MAX)) + '''''''' | |
end | |
+'' | |
and '+@StatColumnName+' < ''+ | |
case when '''+@StatColType+''' in (''int'',''decimal'',''numeric'',''bigint'',''real'') | |
then | |
cast(@thiskey as nvarchar(255)) | |
else | |
'''''''' + CAST(@thiskey as NVARCHAR(MAX)) + '''''''' | |
end | |
+'' | |
group by '+@StatColumnName+' | |
) base | |
) agg ''; | |
execute sp_executesql @isql,@parmdef,@skewfactor=@skewfactor output; | |
update tempdb.dbo.HISTOGRAM set skew_factor = @skewfactor where RANGE_HI_KEY=@thiskey; | |
set @lastkey = @thiskey; | |
fetch next from histo_cur | |
into @thiskey,@avg_range_rows; | |
end | |
close histo_cur; | |
deallocate histo_cur;'; | |
begin try | |
PRINT 'RUNNING ANALYSIS' | |
execute (@sql); | |
end try | |
begin catch | |
print 'error running analysis' | |
print 'sql length: ' + cast(len(@sql) as nvarchar(255)); | |
print ERROR_MESSAGE(); | |
select @TableName,@StatColumnName,@StatName,RANGE_HI_KEY, AVG_RANGE_ROWS | |
from tempdb.dbo.HISTOGRAM | |
print @sql; | |
end catch; | |
with rnk as ( | |
select | |
row_number() over (order by range_hi_key) as rn, | |
RANGE_HI_KEY, skew_factor | |
from tempdb.dbo.HiSTOGRAM | |
) | |
INSERT tempdb.dbo.skewed_columns | |
SELECT | |
@SchemaName,@TableName,@StatColumnName,@StatName,h.skew_factor,'', | |
' | |
IF EXISTS (SELECT 1 FROM sys.stats WHERE QUOTENAME(name) = ''[stf_'+@TableName+'_'+@StatColumnName+'_'+cast(h2.RANGE_HI_KEY as nvarchar(50))+'_'+cast(h.RANGE_HI_KEY as nvarchar(50))+']'') | |
BEGIN | |
DROP STATISTICS '+@TableName+'.[stf_'+@TableName+'_'+@StatColumnName+'_'+cast(h2.RANGE_HI_KEY as nvarchar(50))+'_'+cast(h.RANGE_HI_KEY as nvarchar(50))+'] | |
END | |
create statistics [stf_'+@TableName+'_'+@StatColumnName+'_'+cast(h2.RANGE_HI_KEY as nvarchar(50))+'_'+cast(h.RANGE_HI_KEY as nvarchar(50))+'] | |
on '+@TableName+'('+@StatColumnName+') ' + | |
CASE | |
WHEN @StatColType IN ('money','tinyint', 'smallint', 'int', 'bigint','decimal','numeric','float') | |
THEN ' where '+@StatColumnName+' > '+cast(h2.RANGE_HI_KEY as nvarchar(255))+' and '+@StatColumnName+' <'+cast(h.RANGE_HI_KEY as nvarchar(255)) | |
WHEN @StatColType IN ('char', 'varchar', 'nchar', 'nvarchar') | |
THEN ' where '+@StatColumnName+' > '''+cast(h2.RANGE_HI_KEY as nvarchar(255))+''' and '+@StatColumnName+' < '''+cast(h.RANGE_HI_KEY as nvarchar(255)) + '''' | |
WHEN @StatColType IN ('datetime', 'datetime2') | |
THEN ' where '+@StatColumnName+' > '''+CAST(CONVERT(NVARCHAR(255),h2.RANGE_HI_KEY,120) AS NVARCHAR(255))+''' and '+@StatColumnName+' < '''+CAST(CONVERT(NVARCHAR(255),h2.RANGE_HI_KEY,120) AS NVARCHAR(255)) + '''' | |
END | |
+ ' | |
with fullscan | |
GO' | |
from rnk h | |
join rnk h2 | |
on h.rn = (h2.rn+1) | |
where h.skew_factor >= 2.5 | |
TRUNCATE TABLE tempdb.dbo.DENSITY_VECTOR; | |
fetch next from statcur into | |
@SchemaName, | |
@TableName, | |
@StatColumnName, | |
@StatName, | |
@StatColType, | |
@StatColPrecision, | |
@StatColMaxLen, | |
@StatColScale, | |
@StatColCollation; | |
end | |
close statcur | |
deallocate statcur | |
--dbcc show_statistics(backup_timeline,_WA_Sys_00000002_117F9D94) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment