Skip to content

Instantly share code, notes, and snippets.

@matthew-n
Last active May 18, 2021 16:45
Show Gist options
  • Save matthew-n/663c8e34247d416c82f247937a47aab6 to your computer and use it in GitHub Desktop.
Save matthew-n/663c8e34247d416c82f247937a47aab6 to your computer and use it in GitHub Desktop.
metadata tools
CREATE TABLE ##sample_set (
sample_set_id INT NOT NULL IDENTITY(1,1),
[database_id] INT NOT NULL,
[database_name] NVARCHAR(128) NOT NULL,
[table_id] INT NOT NULL,
[table_name] NVARCHAR(128) NOT NULL,
[column_id] INT NOT NULL,
[column_name] NVARCHAR(128) NOT NULL,
[stats_id] INT NULL,
[status_name] NVARCHAR(128) NULL,
[last_update] DATETIME2(7) NULL
PRIMARY KEY (sample_set_id),
UNIQUE (database_id, table_id, column_id)
);
CREATE TABLE ##samples (
sample_set_id INT NOT NULL,
sample_num INT NOT NULL,
sample_value NVARCHAR(4000),
PRIMARY KEY(sample_set_id, sample_num)
);
WITH filtered_columns (object_id,column_id,name) AS
(
SELECT
object_id,
column_id,
name
FROM sys.columns AS c
WHERE
c.is_identity = 0 AND
NOT EXISTS (
-- the column has a fk to an identity column
SELECT 1
FROM sys.foreign_key_columns AS fkc
JOIN sys.columns AS ref_c ON fkc.referenced_object_id = ref_c.object_id AND fkc.referenced_column_id = ref_c.column_id
WHERE
fkc.parent_object_id = c.object_id AND
fkc.parent_column_id = c.column_id AND
ref_c.is_identity=1
)
)
INSERT INTO ##sample_set
SELECT
DB_ID() AS [database_id],
DB_NAME() AS [database_name],
tmp.table_id,
tmp.table_name,
tmp.column_id,
tmp.column_name,
tmp.stats_id,
tmp.status_name,
tmp.last_updated
FROM (
SELECT
tbl.object_id AS [table_id],
tbl.name AS [table_name],
c.column_id,
c.name AS [column_name],
s.stats_id,
s.name AS [status_name],
sp.last_updated,
--priorites the stats a column occurs in pk > index > auto-generated
ROW_NUMBER()OVER(PARTITION BY c.object_id, c.column_id ORDER BY s.stats_id) AS rn
FROM sys.tables AS tbl
JOIN filtered_columns AS c ON c.object_id=tbl.object_id
LEFT JOIN (
sys.stats AS s
JOIN sys.stats_columns AS sc ON sc.object_id = s.object_id AND sc.stats_id = s.stats_id
) ON
s.object_id = tbl.object_id AND sc.column_id = c.column_id
-- we are instested in the first column of read and complete stats
AND sc.stats_column_id = 1 AND s.is_temporary = 0 AND s.has_filter = 0
LEFT JOIN sys.indexes AS i ON i.object_id = tbl.object_id
-- stats created by index have this quality
AND s.stats_id = i.index_id
OUTER APPLY sys.dm_db_stats_properties(s.object_id, s.stats_id) sp
) AS tmp
WHERE
tmp.rn = 1
/* declare variables */
DECLARE
@sample_set_id INT,
@table_id INT,
@stats_id INT;
DECLARE sample_set CURSOR FAST_FORWARD READ_ONLY FOR
SELECT sample_set_id, table_id, stats_id
FROM ##sample_set
WHERE
stats_id IS NOT NULL AND
sample_set_id > COALESCE((SELECT MAX(sample_set_id) FROM ##samples),0)
OPEN sample_set
FETCH NEXT FROM sample_set INTO @sample_set_id, @table_id, @stats_id
WHILE @@FETCH_STATUS = 0
BEGIN
INSERT INTO ##samples
SELECT
@sample_set_id,
b.step_number AS sample_number,
CAST(b.range_high_key AS NVARCHAR(4000)) AS sample_value
FROM sys.dm_db_stats_histogram(@table_id,@stats_id)AS b
FETCH NEXT FROM sample_set INTO @sample_set_id, @table_id, @stats_id
END
CLOSE sample_set
DEALLOCATE sample_set
/* declare variables */
DECLARE @stmt NVARCHAR(4000);
DECLARE take_samples CURSOR FAST_FORWARD READ_ONLY FOR
SELECT
CONCAT('select ',sample_set_id,', row_number() over(order by ',a.column_name,') , cast(',a.column_name,' as nvarchar(4000)) as sample_value from ',table_name,' tablesample (200 rows)')
FROM ##sample_set AS a
WHERE
a.database_id = DB_ID() AND
stats_id IS NULL AND
NOT EXISTS (SELECT 1 FROM ##samples AS b WHERE b.sample_set_id = a.sample_set_id);
OPEN take_samples
FETCH NEXT FROM take_samples INTO @stmt
WHILE @@FETCH_STATUS = 0
BEGIN
insert INTO ##samples
EXEC sys.sp_executesql @stmt;
FETCH NEXT FROM take_samples INTO @stmt
END
CLOSE take_samples
DEALLOCATE take_samples
SET STATISTICS IO, TIME on
GO
CREATE VIEW metadata.Catalogs
AS
SELECT
o.object_id AS major_id,
COALESCE(SubObj.sub_id,0) AS minor_id,
QUOTENAME(OBJECT_SCHEMA_NAME(o.object_id))
+ISNULL('.' + QUOTENAME(o.name),'')
+ISNULL('.'+ QUOTENAME(SubObj.level2name),'')
AS FullName,
'SCHEMA' AS level0type,
OBJECT_SCHEMA_NAME(o.object_id) AS level0name,
CASE o.type_desc
WHEN 'VIEW' THEN 'VIEW'
WHEN 'USER_TABLE' THEN 'TABLE'
WHEN 'SQL_STORED_PROCEDURE' THEN 'PROCEDURE'
WHEN 'SQL_SCALAR_FUNCTION' THEN 'FUNCTION'
WHEN 'CLR_SCALAR_FUNCTION' THEN 'FUNCTION'
WHEN 'SQL_TABLE_VALUED_FUNCTION' THEN 'FUNCTION'
WHEN 'SQL_INLINE_TABLE_VALUED_FUNCTION' THEN 'FUNCTION'
ELSE o.type_desc
END AS level1type,
o.name AS level1Name,
SubObj.level2type,
SubObj.level2name
FROM sys.objects AS o
LEFT JOIN (
SELECT
parent_object_id, object_id, 'CONSTRAINT', name
FROM sys.objects
WHERE
is_ms_shipped =0 AND
parent_object_id <>0
UNION ALL
SELECT object_id, column_id, 'COLUMN', name FROM sys.columns UNION ALL
SELECT object_id, index_id, 'INDEX', name FROM sys.indexes UNION ALL
SELECT object_id, parameter_id, 'PARAMETER', name FROM sys.parameters
) AS SubObj(parent_id, sub_id, level2type, level2name)
ON o.object_id = SubObj.parent_id
WHERE
o.is_ms_shipped = 0 AND
o.parent_object_id = 0
UNION ALL
SELECT
s.schema_id,
0,
QUOTENAME(s.name),
'SCHEMA',
name,
NULL,
NULL,
NULL,
NULL
FROM sys.schemas AS s
UNION ALL
SELECT
0,
0,
QUOTENAME(DB_NAME()),
'DATABASE',
DB_NAME(),
NULL,
NULL,
NULL,
NULL
GO
/*
source: http://www.sqlservice.se/ssms-data-classification-part-2/
*/
SET XACT_ABORT OFF;
IF (OBJECT_ID('tempdb..##server_collect') IS NOT NULL)
EXEC('DROP TABLE ##server_collect;')
GO
CREATE TABLE ##server_collect (
[database_id] INT NOT NULL,
[object_id] INT NOT NULL,
[schema_name] SYSNAME NOT NULL,
[table_name] SYSNAME NOT NULL,
[column_name] SYSNAME NOT NULL,
[type_name] SYSNAME NOT NULL,
[max_length] SMALLINT NOT NULL,
[scale] SMALLINT NOT NULL,
[precision] SMALLINT NOT NULL,
[is_in_memory] BIT NULL
)
DECLARE @cmd NVARCHAR(4000) =N'
INSERT INTO ##server_collect
SELECT DISTINCT
DB_ID() AS database_id,
C.object_id,
S.NAME AS schema_name,
T.NAME AS table_name,
C.NAME AS column_name,
TP.name as type_name,
C.max_length,
C.scale,
c.precision,
COALESCE(OBJECTPROPERTY(t.object_id, ''TableIsMemoryOptimized''),CAST(0 AS BIT)) AS is_in_memory
FROM sys.schemas S
JOIN sys.tables T ON S.schema_id = T.schema_id
JOIN sys.columns C ON T.object_id = C.object_id
JOIN sys.types TP ON C.system_type_id = TP.system_type_id;
'
/** scan the server **/
--SET @cmd = N'use [?];'+CHAR(10)+CHAR(13)+@cmd;
--EXEC sys.sp_MSforeachdb @command1 = @cmd
--EXEC dbo.sp_foreachdb @command =@cmd, -- nvarchar(max)
-- @replacechar = N'?', -- nchar(1)
-- @print_dbname = 1, -- bit
-- --@print_command_only = 1, -- bit
-- @suppress_quotename = 1, -- bit
-- @user_only = 1
-- --, @name_pattern = N''
-- --, @database_list = N''
/** single database execute **/
EXEC sys.sp_executesql @cmd;
DECLARE @InfoTypeRanking TABLE (
info_type NVARCHAR(128) NOT NULL,
info_type_guid uniqueidentifier NOT NULL DEFAULT newid(),
ranking INT NOT NULL
);
DECLARE @Dictionary TABLE (
pattern NVARCHAR(128) NOT NULL,
info_type NVARCHAR(128) NOT NULL,
sensitivity_label NVARCHAR(128) NOT NULL,
can_be_numeric BIT NOT NULL DEFAULT 1
);
DECLARE @SensitivityLabel TABLE
(
sensitivity_label NVARCHAR(128) NOT NULL,
sensitivity_label_guid UNIQUEIDENTIFIER NOT NULL DEFAULT newid()
);
DECLARE @ClassifcationResults TABLE
(
[database_name] NVARCHAR(128),
[schema_name] NVARCHAR(128),
table_name NVARCHAR(128),
column_name NVARCHAR(128),
column_type_info NVARCHAR(256),
info_type NVARCHAR(128),
sensitivity_label NVARCHAR(128),
ranking INT,
can_be_numeric BIT,
is_in_memory BIT
);
/* If we get multiple matches we will apply the lowest ranking first
So if a column matches on both Name and CreditCard it will be classified as Name
Your priorities may differ from Microsoft's */
INSERT INTO @InfoTypeRanking (info_type, ranking)
VALUES
('Networking', 100),
('Contact Info', 200),
('Credentials', 300),
('Name', 400),
('National ID', 500),
('SSN', 600),
('Credit Card', 700),
('Banking', 800),
('Financial', 900),
('Health', 1000),
('Date Of Birth', 1100),
('Other', 1200);
INSERT INTO @Dictionary (pattern, info_type, sensitivity_label, can_be_numeric)
VALUES
('%name%', 'Name', 'GDPR', 0),
('%ssn%', 'SSN', 'PII', 1),
('%acct%', 'Financial', 'PII', 1),
('%rtno%', 'Financial', 'PII', 1),
('%dob%', 'Date Of Birth', 'PII',1)
/* add or adjust as necessary */
;
Insert @SensitivityLabel (sensitivity_label)
SELECT distinct sensitivity_label from @Dictionary;
INSERT INTO @ClassifcationResults
SELECT DISTINCT
DB_NAME(src.database_id) [database_name],
src.[schema_name],
src.[table_name],
src.[column_name],
CONCAT(src.[type_name] ,
'('+CASE src.[type_name]
WHEN 'NVARCHAR' THEN COALESCE( CAST(NULLIF(src.max_length,-1)AS VARCHAR), 'max' )
WHEN 'VARCHAR' THEN COALESCE( CAST(NULLIF(src.max_length,-1)AS VARCHAR), 'max' )
WHEN 'VARBINARY' THEN COALESCE( CAST(NULLIF(src.max_length,-1)AS VARCHAR), 'max' )
WHEN 'char' THEN COALESCE( CAST(NULLIF(src.max_length,-1)AS VARCHAR), 'max' )
WHEN 'datetime2' THEN CAST(src.scale AS VARCHAR)
WHEN 'decimal' THEN CONCAT(src.precision,',', src.scale)
WHEN 'numeric' THEN CONCAT(src.precision,',', src.scale)
END+')') AS column_type_info,
D.info_type,
D.sensitivity_label,
R.ranking,
D.can_be_numeric,
src.is_in_memory
FROM ##server_collect AS src
LEFT JOIN @Dictionary D
ON (
D.pattern NOT LIKE '%[%]%'
AND LOWER(src.[column_name]) = LOWER(D.pattern) COLLATE DATABASE_DEFAULT
)
OR
(
D.pattern LIKE '%[%]%'
AND LOWER(src.[column_name]) LIKE LOWER(D.pattern) COLLATE DATABASE_DEFAULT
)
LEFT JOIN @InfoTypeRanking R ON (R.info_type = D.info_type)
WHERE
(D.info_type IS NOT NULL)
AND NOT (
D.can_be_numeric = 0
AND src.[type_name] IN ( 'bigint', 'bit', 'decimal', 'float', 'int', 'money', 'numeric', 'smallint', 'smallmoney', 'tinyint' )
);
SELECT
ROW_NUMBER() OVER (ORDER BY MR.schema_name, MR.table_name, MR.column_name) AS rnum,
CR.database_name,
MR.schema_name AS schema_name,
MR.table_name AS table_name,
MR.column_name AS column_name,
CR.is_in_memory AS IsInMemory,
CR.column_type_info AS column_type_info,
CR.info_type AS information_type_name,
CONVERT(VARCHAR(50), ITR.info_type_guid) AS info_type_guid,
CR.sensitivity_label AS sensitivity_label_name,
CONVERT(VARCHAR(50), SL.sensitivity_label_guid) AS sensitivity_label_guid
FROM (
SELECT schema_name,
table_name,
column_name,
MIN(ranking) AS min_ranking
FROM @ClassifcationResults
GROUP BY schema_name,
table_name,
column_name
) MR
INNER JOIN @ClassifcationResults CR
ON CR.schema_name = MR.schema_name
AND CR.table_name = MR.table_name
AND CR.column_name = MR.column_name
AND CR.ranking = MR.min_ranking
JOIN @InfoTypeRanking ITR ON ITR.info_type = CR.info_type
JOIN @SensitivityLabel SL ON SL.sensitivity_label = CR.sensitivity_label
ORDER BY
CR.database_name,MR.schema_name, MR.table_name, MR.column_name;
--drop table #result_defs
select
o.object_id as proc_id
,o.name as proc_name
,x.name
,x.column_ordinal
,x.is_nullable, x.system_type_id, x.precision, x.scale
into #result_defs
from sys.objects as o
cross apply sys.dm_exec_describe_first_result_set_for_object(o.object_id,1) as x
where
is_ms_shipped = 0 AND type = 'p'
and is_hidden =0;
create unique clustered index result_defs_uq on #result_defs(proc_id, column_ordinal);
SELECT
p.object_id,
p.name,
result.min_proc_obj as class_out,
input.min_proc_obj as class_in,
x.*
into #results
FROM sys.procedures as p
OUTER APPLY (
select top 1
y.*
from (
values
('insert', 1,0,0,0),
('get', 0,1,0,0),
('getall', 0,1,0,0),
('delete', 0, 0, 0, 1),
('update', 0,0,1,0),
('exists',null,null,null,null),
('count',null,null,null,null)
) as y(search, [create], [read], [update], [delete])
where
charindex(y.search, p.name)> 0
) as x
OUTER APPLY (
SELECT TOP 1
MIN(t2.proc_id) AS min_proc_obj
FROM #result_defs AS t2
WHERE
t2.proc_id <= p.object_id AND
EXISTS (SELECT NULL FROM #result_defs as t1
WHERE proc_id = p.object_id AND
t1.name = t2.name AND
t1.system_type_id = t2.system_type_id AND
t1.is_nullable = t2.is_nullable)
GROUP BY
t2.proc_id
HAVING
COUNT(*) = (select COUNT(*) from #result_defs where proc_id = p.object_id )AND
COUNT(*) = (select COUNT(*) from #result_defs where proc_id = t2.proc_id )
ORDER BY min_proc_obj
)as result
OUTER APPLY (
SELECT TOP 1
MIN(t2.proc_id) AS min_proc_obj
FROM #result_defs AS t2
WHERE
t2.proc_id <= p.object_id AND
EXISTS (SELECT NULL FROM (select object_id as proc_id, right(name,len(name)-1) as name, system_type_id from sys.parameters) as t1
WHERE proc_id = p.object_id AND
t1.name = t2.name AND
t1.system_type_id = t2.system_type_id )
GROUP BY
t2.proc_id
HAVING
COUNT(*) = (select COUNT(*) from #result_defs where proc_id = p.object_id )AND
COUNT(*) = (select COUNT(*) from #result_defs where proc_id = t2.proc_id )
ORDER BY min_proc_obj
) as input
WHERE
p.is_ms_shipped = 0 AND
p.name not like 'sp%'
ORDER BY
p.name, p.object_id
select
object_name(class_out), COUNT(*)
from #results
where
class_out is not null
and exists (
select NULL from #result_defs
where proc_id = #results.object_id
having COUNT(*)>1
)
group by class_out
having COUNT(*) > 1
order by 2 DESC
with meh as (
select
class_out,
COUNT(DISTINCT r.object_id) as use_count
FROM #results AS r
JOIN #result_defs def on object_id = proc_id
where
class_out is not null
group by
class_out
having
COUNT(DISTINCT r.object_id)>1 AND
COUNT(def.column_ordinal) > COUNT(DISTINCT r.object_id)
)
select
*
from
SELECT
s.object_id AS [TableID],
tbl.name AS [TableName],
sc.column_id AS ColumnID,
c.name as ColumnName,
s.stats_id,
s.name AS StatName,
sc.stats_column_id,
i.index_id,
i.name AS IdxName,
sp.last_updated,
s.auto_created,
sp.rows,
sp.rows_sampled,
sp.unfiltered_rows,
sp.modification_counter,
sp.steps,
sp.persisted_sample_percent,
i.is_hypothetical
--, i.is_ignored_in_optimization
FROM sys.tables AS tbl
JOIN sys.columns AS c ON c.object_id=tbl.object_id
LEFT JOIN (
sys.stats AS s
JOIN sys.stats_columns AS sc ON sc.object_id = s.object_id AND sc.stats_id = s.stats_id
) ON
s.object_id = tbl.object_id AND sc.column_id = c.column_id
LEFT JOIN sys.indexes AS i ON i.object_id = tbl.object_id AND s.stats_id = i.index_id
CROSS APPLY sys.dm_db_stats_properties(s.object_id, s.stats_id) sp
ORDER BY
[TableName],s.stats_id,sc.stats_column_id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment