Skip to content

Instantly share code, notes, and snippets.

@Tyriar
Created July 17, 2014 13:18
Show Gist options
  • Star 21 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save Tyriar/d3635c6b6e32ac406623 to your computer and use it in GitHub Desktop.
Save Tyriar/d3635c6b6e32ac406623 to your computer and use it in GitHub Desktop.
Various T-SQL database obfuscation scripts
-- randomise a column's values using a list of specified values
IF EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'ShuffleTable')
DROP TABLE [ShuffleTable];
IF EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'InsertDataListTable')
DROP TABLE [InsertDataListTable];
GO
CREATE TABLE [InsertDataListTable] ([TargetColumn] NVARCHAR(50));
INSERT INTO [InsertDataListTable] ([TargetColumn]) VALUES('Value 1');
-- ...
-- ...
-- ...
INSERT INTO [InsertDataListTable] ([TargetColumn]) VALUES('Value n');
DECLARE @SrcRecCount AS INTEGER;
DECLARE @SubstRecCount AS INTEGER;
DECLARE @CopiesOfFullSet AS INTEGER;
SELECT @SrcRecCount = COUNT(*) FROM [TargetTable];
SELECT @SubstRecCount = COUNT(*) FROM [InsertDataListTable];
SET @CopiesOfFullSet = @SrcRecCount / @SubstRecCount;
CREATE TABLE [ShuffleTable] (Id INT IDENTITY(1,1), [TargetColumn] NVARCHAR(50));
DECLARE @i AS INTEGER;
SET @i = 0;
WHILE @i < @CopiesOfFullSet
BEGIN
INSERT INTO [ShuffleTable]
SELECT [TargetColumn]
FROM [InsertDataListTable]
ORDER BY NEWID();
SET @i = @i + 1;
END
INSERT INTO [ShuffleTable]
SELECT TOP (@SrcRecCount - @SubstRecCount * @CopiesOfFullSet) [TargetColumn]
FROM [InsertDataListTable]
ORDER BY NEWID();
UPDATE src
SET src.[TargetColumn] = shuffled.[TargetColumn]
FROM
(SELECT ROW_NUMBER() OVER(ORDER BY [TargetColumn] DESC) AS 'RowNumber', *
FROM [TargetTable]) AS src
INNER JOIN [ShuffleTable] AS shuffled
ON shuffled.Id = src.RowNumber
-- Cleanup
DROP TABLE [ShuffleTable];
DROP TABLE [InsertDataListTable];
GO
-- randomise a column's values to a specific integer range
-- Note this won't work for phone numbers as it will enter scientific notation
UPDATE [TargetTable]
SET [TargetColumn] = MIN_VALUE + FLOOR((CAST(ABS(CHECKSUM(NEWID())) AS FLOAT) / 2147483648) * (MAX_VALUE - MIN_VALUE))
GO
-- set a column's values as a fixed string
-- 50 = [TargetColumn] size
UPDATE [TargetTable]
SET [TargetColumn] = 'new string'
WHERE [TargetColumn] IS NOT NULL
GO
-- set a column's values as a fixed string plus the row number
-- eg. 'string-1', 'string-2', ...
UPDATE t
SET t.[TargetColumn] = LEFT('string' + CAST(t.RowNumber AS VARCHAR(8)), 50)
FROM
(SELECT ROW_NUMBER() OVER (ORDER BY [TargetColumn]) AS RowNumber, *
FROM [TargetTable]) AS t
GO
-- Shuffle the records in a table with a composite primary key
-- Keys: [Key1] INT, [Key2] INT
-- * Can be any type provided the pair are unique
-- * Doesn't need to be a primary key
-- Target column: [TargetColumn] VARCHAR(50)
-- * Ensure type is correct
IF EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'ShuffleTable')
DROP TABLE [ShuffleTable];
IF EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'TempTable')
DROP TABLE [TempTable];
GO
CREATE TABLE [ShuffleTable] (
[RowNumber] int IDENTITY(1,1),
[Key1] INT,
[Key2] INT,
[ShuffleColumn] varchar(50)
);
INSERT [ShuffleTable] ( [Key1], [Key2], [ShuffleColumn] )
SELECT [Key1], [Key2], [ShuffleColumn]
FROM [TargetTable] ORDER BY NEWID();
CREATE TABLE [TempTable] (
[RowNumber] int IDENTITY(1,1),
[Key1] INT,
[Key2] INT
);
INSERT [TempTable] ([Key1], [Key2])
SELECT [Key1], [Key2]
FROM [TargetTable];
UPDATE [src]
SET [src].[ShuffleColumn] = [shuffle].[ShuffleColumn]
FROM [TargetTable] AS [src]
INNER JOIN [TempTable] temp
ON [src].[Key1] = [temp].[Key1] AND
[src].[Key2] = [temp].[Key2]
INNER JOIN [ShuffleTable] AS shuffle
ON [shuffle].[RowNumber] = [temp].[RowNumber];
DROP TABLE [ShuffleTable]
DROP TABLE [TempTable]
GO
-- Shuffle the records in a table with a single unique key
-- Key: [Id] INT
-- * Can be any type provided it's unique
-- * Doesn't need to be a primary key
-- Target column: [TargetColumn] VARCHAR(50)
-- * Ensure type is correct
IF EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'ShuffleTable')
DROP TABLE [ShuffleTable];
IF EXISTS (SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = 'TempTable')
DROP TABLE [TempTable];
GO
CREATE TABLE [ShuffleTable] (
[RowNumber] INT IDENTITY(1,1),
[Id] INT,
[TargetColumn] VARCHAR(50)
);
INSERT [ShuffleTable] ( [Id], [TargetColumn] )
SELECT [Id], [TargetColumn]
FROM [TargetTable] ORDER BY NEWID();
CREATE TABLE [TempTable] (
[RowNumber] INT IDENTITY(1,1),
[Id] INT
);
INSERT [TempTable] ([Id])
SELECT [Id]
FROM [TargetTable];
UPDATE [src]
SET [src].[TargetColumn] = [shuffle].[TargetColumn]
FROM [TargetTable] AS [src]
INNER JOIN [TempTable] [temp]
ON [src].[Id] = [temp].[Id]
INNER JOIN [ShuffleTable] AS [shuffle]
ON [shuffle].[RowNumber] = [temp].[RowNumber];
DROP TABLE [ShuffleTable]
DROP TABLE [TempTable]
GO
-- transform a column's values by adding a x day date variance
-- 30 = day variance
UPDATE [TargetTable]
SET [TargetColumn] = DATEADD(DAY, ((CAST(ABS(CHECKSUM(NEWID())) AS FLOAT) / 2147483648) * 30 * 2 - 30), [TargetColumn]);
GO
-- transform a column's values by adding a percentage variance
-- 0.25 = numeric variance
UPDATE [TargetTable]
SET [TargetColumn] = [TargetColumn] * (1+(CAST(ABS(CHECKSUM(NEWID())) AS FLOAT) / 2147483648) * 0.25 * 2 - 0.25);
GO
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment