Created
March 8, 2017 14:12
-
-
Save AndyConlisk/7aea664c1b73ac0ef9825b4383e68dc9 to your computer and use it in GitHub Desktop.
Finding and removing duplicate rows from database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- SQL databate table dbo.Test with columns Id, FirstName, LastName, TransactionNumber, LastUpdatedDate | |
-- Select rows that are duplicates without using LastUpdatedDate | |
-- If needed can insert into a backup/archive table | |
SELECT t.Id, t.FirstName, t.LastName, t.TransactionNumber, t.LastUpdatedDate | |
FROM dbo.Test t | |
LEFT OUTER JOIN ( | |
SELECT MIN(Id) as Id, | |
FirstName, LastName, TransactionNumber | |
FROM dbo.Test | |
GROUP BY FirstName, LastName, TransactionNumber | |
) as keepRows | |
ON t.Id = keepRows.Id | |
WHERE keepRows.Id IS NULL | |
-- Delete the duplicate rows | |
DELETE FROM dbo.Test | |
FROM dbo.Test t | |
LEFT OUTER JOIN ( | |
SELECT MIN(Id) as Id, | |
FirstName, LastName, TransactionNumber | |
FROM dbo.Test | |
GROUP BY FirstName, LastName, TransactionNumber | |
) as keepRows | |
ON t.Id = keepRows.Id | |
WHERE keepRows.Id IS NULL |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment