Skip to content

Instantly share code, notes, and snippets.

@swasheck
Created May 16, 2014 19:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save swasheck/eb7eb5f5f135280824ef to your computer and use it in GitHub Desktop.
Save swasheck/eb7eb5f5f135280824ef to your computer and use it in GitHub Desktop.
stats_analysis.sql
DECLARE @sql varchar(max);
IF EXISTS (SELECT 1 FROM tempdb.sys.tables WHERE NAME='HISTOGRAM')
BEGIN
DROP TABLE tempdb.dbo.HISTOGRAM;
END
IF (SELECT CURSOR_STATUS('global','statcur')) >= -1
BEGIN
DEALLOCATE statcur
END
IF (SELECT CURSOR_STATUS('global','histo_cur')) >= -1
BEGIN
DEALLOCATE histo_cur
END
IF EXISTS (SELECT 1 FROM tempdb.sys.tables WHERE name = 'skewed_columns')
begin
drop table tempdb.dbo.skewed_columns;
end
CREATE TABLE tempdb.dbo.skewed_columns (
schema_name sysname,
table_name sysname,
column_name sysname,
stat_name sysname null,
skewfactor real,
analysis_range nvarchar(max),
stat_sql nvarchar(max)
);
-- CURSOR VARIABLES
DECLARE
@SchemaName sysname,
@TableName sysname,
@StatColumnName sysname,
@StatName sysname,
@StatColType sysname,
@StatColPrecision int,
@StatColMaxLen int,
@StatColScale int,
@StatColCollation sysname;
declare statcur cursor fast_forward read_only for
select
ss.name as schema_name,
t.name as table_name,
c.name stat_column_name,
s.name stat_name,
ty.name stat_column_type,
c.precision stat_column_precision,
c.max_length stat_column_max_length,
c.scale stat_column_scale,
c.collation_name
from sys.columns c
join sys.tables t
on c.object_id = t.object_id
join sys.stats s
on t.object_id = s.object_id
and c.name = INDEX_COL(OBJECT_NAME(s.object_id),stats_id,1)
join sys.types ty
on c.user_type_id = ty.user_type_id
join sys.schemas ss
on t.schema_id = ss.schema_id
where ss.name <> 'sys'
and ty.name not in ('xml','ntext','text') -- not a comparable data type
open statcur
fetch next from statcur into
@SchemaName,
@TableName,
@StatColumnName,
@StatName,
@StatColType,
@StatColPrecision,
@StatColMaxLen,
@StatColScale,
@StatColCollation;
IF EXISTS (SELECT 1 FROM tempdb.sys.tables where name = 'DENSITY_VECTOR')
BEGIN
DROP TABLE tempdb.dbo.DENSITY_VECTOR;
END
CREATE TABLE tempdb.dbo.DENSITY_VECTOR (
AllDensity REAL,
AverageLength REAL,
[Columns] NVARCHAR(4000)
)
while @@fetch_status = 0
begin
print 'Stat Table Name: ' + @TableName;
print 'Stat Object Name: ' + @StatName;
print 'Stat Column Name: ' + @StatColumnName;
print 'Stat Column Type: ' + @StatColType;
print 'Stat Column Precision: ' + CAST(@StatColPrecision as nvarchar(255))
print 'Stat Column Scale: ' + CAST(@StatColScale as nvarchar(255))
print 'Stat Column MaxLen: ' + CAST(@StatColMaxLen as nvarchar(255))
set @sql = 'DBCC SHOW_STATISTICS(''' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@TableName) +''',' + QUOTENAME(@StatColumnName) + ') WITH DENSITY_VECTOR, NO_INFOMSGS;';
begin try
print 'POPULATING DENSITY VECTOR (''' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@TableName) +''',' + QUOTENAME(@StatColumnName) + ')'
INSERT INTO tempdb.dbo.DENSITY_VECTOR
execute(@sql);
end try
begin catch
print @sql;
print error_message();
end catch
set @sql = '
IF EXISTS (SELECT 1 FROM tempdb.sys.tables WHERE NAME = ''HISTOGRAM'')
BEGIN
DROP TABLE tempdb.dbo.HISTOGRAM;
END
CREATE TABLE tempdb.dbo.HISTOGRAM (
RANGE_HI_KEY ' +
case when @StatColType in ('float','tinyint', 'smallint', 'int', 'bigint','datetime','sysname')
then @StatColType
when @StatColType IN ('nchar','nvarchar') and (@StatColMaxLen = 8000 OR @StatColMaxLen = -1)
then @StatColType + '(max)'
WHEN @StatColType IN ('char', 'varchar')
THEN
CASE
WHEN (@StatColMaxLen = 8000 OR @StatColMaxLen = -1)
then @StatColType + '(max)'
ELSE
@StatColType + '(' + CONVERT(VARCHAR(4),@StatColMaxLen) + ') COLLATE ' + @StatColCollation
END
WHEN @StatColType IN ('datetime2', 'datetimeoffset', 'time')
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColScale) + ')'
WHEN @StatColType IN ('numeric', 'decimal')
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColPrecision) + ',' + CONVERT(VARCHAR(5), @StatColScale) + ')'
end + ',
RANGE_ROWS REAL,
EQ_ROWS BIGINT,
DISTINCT_RANGE_ROWS BIGINT,
AVG_RANGE_ROWS REAL
);';
begin try
print 'CREATING HISTOGRAM'
execute(@sql);
end try
begin catch
print error_message();
end catch
SET @sql = 'DBCC SHOW_STATISTICS(''' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@TableName) +''',' + QUOTENAME(@StatName) + ') WITH HISTOGRAM, NO_INFOMSGS';
begin try
print 'POPULATING HISTOGRAM' + ' DBCC SHOW_STATISTICS(''' + QUOTENAME(@SchemaName) + '.' + QUOTENAME(@TableName) +''',' + QUOTENAME(@StatName) + ') WITH HISTOGRAM';
INSERT INTO tempdb.dbo.HISTOGRAM
execute(@sql);
end try
begin catch
print 'populate histo error type: ' + cast(@@error as nvarchar(7));
print error_message();
use tempdb
select @TableName,@SchemaName,@StatColumnName,@StatName;
exec sp_help HISTOGRAM
use idw;
end catch
begin try
if exists (
SELECT 1
FROM tempdb.sys.columns c
JOIN tempdb.sys.tables t
on c.object_id = t.object_id
WHERE t.name = 'HISTOGRAM' and c.name = 'skew_factor'
)
begin
alter table tempdb.dbo.HISTOGRAM
drop column skew_factor
end
alter table tempdb.dbo.HISTOGRAM
add skew_factor real null;
end try
begin catch
print 'error adding skew_factor'
print error_message();
end catch
set @sql = '
declare @avg_range_rows real, @skewfactor real, @isql nvarchar(max),@parmdef nvarchar(500);
set @parmdef = N''@skewfactor real output'';
declare @thiskey '+
case when @StatColType in ('tinyint', 'smallint', 'int', 'bigint','datetime','sysname')
then @StatColType
when @StatColType IN ('nchar','nvarchar') and @StatColMaxLen = 8000
then @StatColType + '(max) '
WHEN @StatColType IN ('char', 'varchar', 'nchar', 'nvarchar')
THEN @StatColType + '(' + CONVERT(VARCHAR(4),@StatColMaxLen) + ') '
WHEN @StatColType IN ('datetime2', 'datetimeoffset', 'time')
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColScale) + ')'
WHEN @StatColType IN ('numeric', 'decimal')
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColPrecision) + ',' + CONVERT(VARCHAR(5), @StatColScale) + ')'
end
+',@lastkey '+
case when @StatColType in ('date','tinyint', 'smallint', 'int', 'bigint','datetime','sysname')
then @StatColType
when @StatColType IN ('nchar','nvarchar') and @StatColMaxLen = 8000
then @StatColType + '(max) '
WHEN @StatColType IN ('char', 'varchar', 'nchar', 'nvarchar')
THEN @StatColType + '(' + CONVERT(VARCHAR(4),@StatColMaxLen) + ') '
WHEN @StatColType IN ('datetime2', 'datetimeoffset', 'time')
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColScale) + ')'
WHEN @StatColType IN ('numeric', 'decimal')
THEN @StatColType + '(' + CONVERT(VARCHAR(5),@StatColPrecision) + ',' + CONVERT(VARCHAR(5), @StatColScale) + ')'
end + '
declare histo_cur cursor fast_forward read_only for
select RANGE_HI_KEY, AVG_RANGE_ROWS
from tempdb.dbo.HISTOGRAM
order by RANGE_HI_KEY ' +
CASE
WHEN @StatColType IN ('char', 'varchar', 'nchar', 'nvarchar')
THEN 'COLLATE ' + @StatColCollation
ELSE ''
END + '
open histo_cur;
fetch next from histo_cur
into @thiskey,@avg_range_rows;
while @@fetch_status = 0
begin
set @isql = ''
select
@skewfactor =
case when abs(1.*''+cast(@avg_range_rows as nvarchar(25))+''-MinimumCount) > abs(1.*''+cast(@avg_range_rows as nvarchar(25))+''-MaximumCount)
then abs(''+cast(@avg_range_rows as nvarchar(25))+''-MinimumCount) / ''+cast(@avg_range_rows as nvarchar(25))+''
else
abs(1.*''+cast(@avg_range_rows as nvarchar(25))+''-MaximumCount) / ''+cast(@avg_range_rows as nvarchar(25))+''
end
from (
select
COUNT(DISTINCT '+@StatColumnName+') ACTUAL_DISTINCT_RANGE_ROWS,
SUM(group_count) ACTUAL_RANGE_ROWS,
MAX(group_count) MaximumCount,
MIN(group_count) MinimumCount,
CASE WHEN count(distinct '+@StatColumnName+') = 0 THEN 1
ELSE
1. * (COUNT('+@StatColumnName+') / count(distinct '+@StatColumnName+'))
END
ACTUAL_AVG_RANGE_ROWS
from (
select
'+@StatColumnName+',
count('+@StatColumnName+') group_count
from '+@TableName+'
where
'+@StatColumnName+' > ''+
case when '''+@StatColType+''' in (''int'',''decimal'',''numeric'',''bigint'',''real'')
then
cast(@lastkey as nvarchar(255))
else
'''''''' + CAST(@lastkey as NVARCHAR(MAX)) + ''''''''
end
+''
and '+@StatColumnName+' < ''+
case when '''+@StatColType+''' in (''int'',''decimal'',''numeric'',''bigint'',''real'')
then
cast(@thiskey as nvarchar(255))
else
'''''''' + CAST(@thiskey as NVARCHAR(MAX)) + ''''''''
end
+''
group by '+@StatColumnName+'
) base
) agg '';
execute sp_executesql @isql,@parmdef,@skewfactor=@skewfactor output;
update tempdb.dbo.HISTOGRAM set skew_factor = @skewfactor where RANGE_HI_KEY=@thiskey;
set @lastkey = @thiskey;
fetch next from histo_cur
into @thiskey,@avg_range_rows;
end
close histo_cur;
deallocate histo_cur;';
begin try
PRINT 'RUNNING ANALYSIS'
execute (@sql);
end try
begin catch
print 'error running analysis'
print 'sql length: ' + cast(len(@sql) as nvarchar(255));
print ERROR_MESSAGE();
select @TableName,@StatColumnName,@StatName,RANGE_HI_KEY, AVG_RANGE_ROWS
from tempdb.dbo.HISTOGRAM
print @sql;
end catch;
with rnk as (
select
row_number() over (order by range_hi_key) as rn,
RANGE_HI_KEY, skew_factor
from tempdb.dbo.HiSTOGRAM
)
INSERT tempdb.dbo.skewed_columns
SELECT
@SchemaName,@TableName,@StatColumnName,@StatName,h.skew_factor,'',
'
IF EXISTS (SELECT 1 FROM sys.stats WHERE QUOTENAME(name) = ''[stf_'+@TableName+'_'+@StatColumnName+'_'+cast(h2.RANGE_HI_KEY as nvarchar(50))+'_'+cast(h.RANGE_HI_KEY as nvarchar(50))+']'')
BEGIN
DROP STATISTICS '+@TableName+'.[stf_'+@TableName+'_'+@StatColumnName+'_'+cast(h2.RANGE_HI_KEY as nvarchar(50))+'_'+cast(h.RANGE_HI_KEY as nvarchar(50))+']
END
create statistics [stf_'+@TableName+'_'+@StatColumnName+'_'+cast(h2.RANGE_HI_KEY as nvarchar(50))+'_'+cast(h.RANGE_HI_KEY as nvarchar(50))+']
on '+@TableName+'('+@StatColumnName+') ' +
CASE
WHEN @StatColType IN ('money','tinyint', 'smallint', 'int', 'bigint','decimal','numeric','float')
THEN ' where '+@StatColumnName+' > '+cast(h2.RANGE_HI_KEY as nvarchar(255))+' and '+@StatColumnName+' <'+cast(h.RANGE_HI_KEY as nvarchar(255))
WHEN @StatColType IN ('char', 'varchar', 'nchar', 'nvarchar')
THEN ' where '+@StatColumnName+' > '''+cast(h2.RANGE_HI_KEY as nvarchar(255))+''' and '+@StatColumnName+' < '''+cast(h.RANGE_HI_KEY as nvarchar(255)) + ''''
WHEN @StatColType IN ('datetime', 'datetime2')
THEN ' where '+@StatColumnName+' > '''+CAST(CONVERT(NVARCHAR(255),h2.RANGE_HI_KEY,120) AS NVARCHAR(255))+''' and '+@StatColumnName+' < '''+CAST(CONVERT(NVARCHAR(255),h2.RANGE_HI_KEY,120) AS NVARCHAR(255)) + ''''
END
+ '
with fullscan
GO'
from rnk h
join rnk h2
on h.rn = (h2.rn+1)
where h.skew_factor >= 2.5
TRUNCATE TABLE tempdb.dbo.DENSITY_VECTOR;
fetch next from statcur into
@SchemaName,
@TableName,
@StatColumnName,
@StatName,
@StatColType,
@StatColPrecision,
@StatColMaxLen,
@StatColScale,
@StatColCollation;
end
close statcur
deallocate statcur
--dbcc show_statistics(backup_timeline,_WA_Sys_00000002_117F9D94)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment