Created
April 2, 2015 21:12
-
-
Save ChadJPetersen/4086cbde73ae12928423 to your computer and use it in GitHub Desktop.
script in Microsoft Transact SQL (T-SQL) that offsets the data in the given columns by a random amount (data obfuscate).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--Chad J Petersen | |
--April 2, 2015 | |
--Microsoft TSQL (transact SQL) 2008 R2 | |
--offset/scramble Selected rows in a DB by a random number. | |
--add values to the @dictionary values to scramble columns with that name everywhere in the DB. | |
--UNAVOIDABLE RESTRAINT:: WILL BREAK IF A TABLE THAT NEEDS SCRAMBLING HAS MORE THAN (9,023,372,036,854,775,807) ROWS; | |
--PARAMETERS | |
--Create dictionary table of columns to offset/scramble. | |
DECLARE @Dictionary TABLE (Word VARCHAR(500)); | |
INSERT INTO @Dictionary | |
VALUES ('SSN'), | |
('FirstName'), | |
('First Name'), | |
('MiddleName'), | |
('Middle Name'), | |
('LastName'), | |
('Last Name'), | |
('DOB'), | |
('DateOfBirth'), | |
('Date Of Birth'), | |
('DOD'), | |
('DateOfDeath'), | |
('Date Of Death'); | |
--END PARAMETERS | |
--START QUERY | |
--create the varchar that will hold the dynamic sql | |
DECLARE @DSQL VARCHAR(MAX); | |
SET @DSQL = ' '; | |
--used to name temp tables so they do not overlap | |
DECLARE @TempNumber BIGINT; | |
SET @TempNumber = ( | |
SELECT 9999999999 * RAND() | |
); | |
--used to store the primary key of the table being worked on | |
DECLARE @TablePrimaryKey VARCHAR(500); | |
SET @TablePrimaryKey = ''; | |
--used to offset the rows by a random number | |
DECLARE @Offset BIGINT; | |
SET @Offset = ( | |
SELECT 9999999999 * RAND() | |
); | |
--Create a cursor that will load each table with a column in dictionary and offset it by a random value. | |
DECLARE @TableName VARCHAR(500) | |
DECLARE TableName_Cursor CURSOR | |
FOR | |
--get a list of table names that have the columns that are specified in the dictionary | |
SELECT DISTINCT (TABLE_SCHEMA + '.' + TABLE_NAME) | |
FROM INFORMATION_SCHEMA.COLUMNS | |
WHERE COLUMN_NAME IN ( | |
SELECT Word | |
FROM @Dictionary | |
) | |
OPEN TableName_Cursor | |
FETCH NEXT | |
FROM TableName_Cursor | |
INTO @TableName | |
WHILE @@FETCH_STATUS = 0 | |
BEGIN | |
--get the primary key for this current table | |
SET @TablePrimaryKey = ( | |
SELECT TOP 1 ccu.COLUMN_NAME | |
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc | |
INNER JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE ccu | |
ON tc.CONSTRAINT_NAME = ccu.Constraint_name | |
WHERE tc.CONSTRAINT_TYPE = 'Primary Key' | |
AND (tc.TABLE_SCHEMA + '.' + tc.TABLE_NAME) = @TableName | |
) | |
--get a cursor for iterating through the words in the dictionary | |
DECLARE @Word VARCHAR(500) | |
DECLARE Word_Cursor CURSOR | |
FOR | |
( | |
SELECT Word | |
FROM @Dictionary | |
) | |
OPEN Word_Cursor | |
FETCH NEXT | |
FROM Word_Cursor | |
INTO @Word | |
WHILE @@FETCH_STATUS = 0 | |
BEGIN | |
SET @TempNumber = @TempNumber + 1 | |
IF EXISTS ( | |
SELECT * | |
FROM INFORMATION_SCHEMA.COLUMNS | |
WHERE (TABLE_SCHEMA + '.' + TABLE_NAME) = @TableName | |
AND COLUMN_NAME = @Word | |
) | |
BEGIN | |
SET @DSQL = ' | |
DECLARE @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ' BIGINT | |
SET @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ' = 0 | |
SELECT *, ROW_NUMBER() OVER(ORDER BY ' + @TablePrimaryKey + ') AS RN | |
INTO #temp' + CAST(@TempNumber AS VARCHAR(20)) + ' | |
FROM ' + @TableName + ' | |
SET @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ' = (SELECT MAX(RN) FROM #temp' + CAST(@TempNumber AS VARCHAR(20)) + ') | |
DECLARE @TBLOffset' + CAST(@TempNumber AS VARCHAR(20)) + ' BIGINT | |
SET @TBLOffset' + CAST(@TempNumber AS VARCHAR(20)) + ' = CAST((' + CAST(@Offset AS VARCHAR(20)) + ' * RAND(' + CASE | |
--this section keeps dates in the same offset so birth date - death date etc end up in the same row to keep intervals constant | |
WHEN @Word LIKE '%date%' | |
OR @Word IN ( | |
'dob', | |
'DOB', | |
'dod', | |
'DOD' | |
) | |
THEN '0.5' | |
ELSE '' | |
END + ')) AS BIGINT) % @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ' | |
DECLARE @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + ' BIGINT | |
DECLARE PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor CURSOR | |
FOR | |
SELECT ' + @TablePrimaryKey + ' | |
FROM ' + @TableName + ' | |
OPEN PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor | |
FETCH NEXT | |
FROM PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor | |
INTO @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + ' | |
WHILE @@FETCH_STATUS = 0 | |
BEGIN | |
UPDATE ' + @TableName + ' | |
SET ' + @Word + ' = ( | |
SELECT TOP 1 T.' + @Word + ' | |
FROM #temp' + CAST(@TempNumber AS VARCHAR(20)) + ' AS T | |
WHERE T.RN = ( | |
SELECT TOP 1 CASE | |
WHEN ((TT.RN + @TBLOffset' + CAST(@TempNumber AS VARCHAR(20)) + ') > @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ') | |
THEN | |
((TT.RN + @TBLOffset' + CAST(@TempNumber AS VARCHAR(20)) + ') - @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ') | |
ELSE | |
(TT.RN + @TBLOffset' + | |
CAST(@TempNumber AS VARCHAR(20)) + ') | |
END | |
FROM #temp' + CAST(@TempNumber AS VARCHAR(20)) + ' AS TT | |
WHERE TT.' + @TablePrimaryKey + ' = @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + ' | |
) | |
) | |
WHERE ' + @TablePrimaryKey + ' = @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + ' | |
FETCH NEXT | |
FROM PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor | |
INTO @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + ' | |
END | |
CLOSE PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor | |
DEALLOCATE PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor | |
DROP TABLE #temp' + CAST(@TempNumber AS VARCHAR(20)) + ' | |
' | |
--PRINT (@DSQL) --Uncomment print and comment exec to see the sql that is being generated. | |
EXEC (@DSQL) | |
END | |
FETCH NEXT | |
FROM Word_Cursor | |
INTO @Word | |
END | |
CLOSE Word_Cursor | |
DEALLOCATE Word_Cursor | |
FETCH NEXT | |
FROM TableName_Cursor | |
INTO @TableName | |
END | |
CLOSE TableName_Cursor | |
DEALLOCATE TableName_Cursor | |
--END QUERY |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment