Skip to content

Instantly share code, notes, and snippets.

@ChadJPetersen
Created April 2, 2015 21:12
Show Gist options
  • Save ChadJPetersen/4086cbde73ae12928423 to your computer and use it in GitHub Desktop.
Save ChadJPetersen/4086cbde73ae12928423 to your computer and use it in GitHub Desktop.
script in Microsoft Transact SQL (T-SQL) that offsets the data in the given columns by a random amount (data obfuscate).
--Chad J Petersen
--April 2, 2015
--Microsoft TSQL (transact SQL) 2008 R2
--offset/scramble Selected rows in a DB by a random number.
--add values to the @dictionary values to scramble columns with that name everywhere in the DB.
--UNAVOIDABLE RESTRAINT:: WILL BREAK IF A TABLE THAT NEEDS SCRAMBLING HAS MORE THAN (9,023,372,036,854,775,807) ROWS;
--PARAMETERS
--Create dictionary table of columns to offset/scramble.
DECLARE @Dictionary TABLE (Word VARCHAR(500));
INSERT INTO @Dictionary
VALUES ('SSN'),
('FirstName'),
('First Name'),
('MiddleName'),
('Middle Name'),
('LastName'),
('Last Name'),
('DOB'),
('DateOfBirth'),
('Date Of Birth'),
('DOD'),
('DateOfDeath'),
('Date Of Death');
--END PARAMETERS
--START QUERY
--create the varchar that will hold the dynamic sql
DECLARE @DSQL VARCHAR(MAX);
SET @DSQL = ' ';
--used to name temp tables so they do not overlap
DECLARE @TempNumber BIGINT;
SET @TempNumber = (
SELECT 9999999999 * RAND()
);
--used to store the primary key of the table being worked on
DECLARE @TablePrimaryKey VARCHAR(500);
SET @TablePrimaryKey = '';
--used to offset the rows by a random number
DECLARE @Offset BIGINT;
SET @Offset = (
SELECT 9999999999 * RAND()
);
--Create a cursor that will load each table with a column in dictionary and offset it by a random value.
DECLARE @TableName VARCHAR(500)
DECLARE TableName_Cursor CURSOR
FOR
--get a list of table names that have the columns that are specified in the dictionary
SELECT DISTINCT (TABLE_SCHEMA + '.' + TABLE_NAME)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMN_NAME IN (
SELECT Word
FROM @Dictionary
)
OPEN TableName_Cursor
FETCH NEXT
FROM TableName_Cursor
INTO @TableName
WHILE @@FETCH_STATUS = 0
BEGIN
--get the primary key for this current table
SET @TablePrimaryKey = (
SELECT TOP 1 ccu.COLUMN_NAME
FROM INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
INNER JOIN INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE ccu
ON tc.CONSTRAINT_NAME = ccu.Constraint_name
WHERE tc.CONSTRAINT_TYPE = 'Primary Key'
AND (tc.TABLE_SCHEMA + '.' + tc.TABLE_NAME) = @TableName
)
--get a cursor for iterating through the words in the dictionary
DECLARE @Word VARCHAR(500)
DECLARE Word_Cursor CURSOR
FOR
(
SELECT Word
FROM @Dictionary
)
OPEN Word_Cursor
FETCH NEXT
FROM Word_Cursor
INTO @Word
WHILE @@FETCH_STATUS = 0
BEGIN
SET @TempNumber = @TempNumber + 1
IF EXISTS (
SELECT *
FROM INFORMATION_SCHEMA.COLUMNS
WHERE (TABLE_SCHEMA + '.' + TABLE_NAME) = @TableName
AND COLUMN_NAME = @Word
)
BEGIN
SET @DSQL = '
DECLARE @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ' BIGINT
SET @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ' = 0
SELECT *, ROW_NUMBER() OVER(ORDER BY ' + @TablePrimaryKey + ') AS RN
INTO #temp' + CAST(@TempNumber AS VARCHAR(20)) + '
FROM ' + @TableName + '
SET @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ' = (SELECT MAX(RN) FROM #temp' + CAST(@TempNumber AS VARCHAR(20)) + ')
DECLARE @TBLOffset' + CAST(@TempNumber AS VARCHAR(20)) + ' BIGINT
SET @TBLOffset' + CAST(@TempNumber AS VARCHAR(20)) + ' = CAST((' + CAST(@Offset AS VARCHAR(20)) + ' * RAND(' + CASE
--this section keeps dates in the same offset so birth date - death date etc end up in the same row to keep intervals constant
WHEN @Word LIKE '%date%'
OR @Word IN (
'dob',
'DOB',
'dod',
'DOD'
)
THEN '0.5'
ELSE ''
END + ')) AS BIGINT) % @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + '
DECLARE @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + ' BIGINT
DECLARE PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor CURSOR
FOR
SELECT ' + @TablePrimaryKey + '
FROM ' + @TableName + '
OPEN PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor
FETCH NEXT
FROM PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor
INTO @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '
WHILE @@FETCH_STATUS = 0
BEGIN
UPDATE ' + @TableName + '
SET ' + @Word + ' = (
SELECT TOP 1 T.' + @Word + '
FROM #temp' + CAST(@TempNumber AS VARCHAR(20)) + ' AS T
WHERE T.RN = (
SELECT TOP 1 CASE
WHEN ((TT.RN + @TBLOffset' + CAST(@TempNumber AS VARCHAR(20)) + ') > @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ')
THEN
((TT.RN + @TBLOffset' + CAST(@TempNumber AS VARCHAR(20)) + ') - @RowNum' + CAST(@TempNumber AS VARCHAR(20)) + ')
ELSE
(TT.RN + @TBLOffset' +
CAST(@TempNumber AS VARCHAR(20)) + ')
END
FROM #temp' + CAST(@TempNumber AS VARCHAR(20)) + ' AS TT
WHERE TT.' + @TablePrimaryKey + ' = @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '
)
)
WHERE ' + @TablePrimaryKey + ' = @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '
FETCH NEXT
FROM PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor
INTO @PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '
END
CLOSE PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor
DEALLOCATE PrimKey' + CAST(@TempNumber AS VARCHAR(20)) + '_Cursor
DROP TABLE #temp' + CAST(@TempNumber AS VARCHAR(20)) + '
'
--PRINT (@DSQL) --Uncomment print and comment exec to see the sql that is being generated.
EXEC (@DSQL)
END
FETCH NEXT
FROM Word_Cursor
INTO @Word
END
CLOSE Word_Cursor
DEALLOCATE Word_Cursor
FETCH NEXT
FROM TableName_Cursor
INTO @TableName
END
CLOSE TableName_Cursor
DEALLOCATE TableName_Cursor
--END QUERY
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment