Last active
August 29, 2015 13:58
-
-
Save AdilHindistan/9979382 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##AH - AdilHindistan - 2014-04-04 | |
## See bottom for more efficient way to delete dupes | |
-- Technique to detect duplicate data in rows | |
select dubCol1,dubCol2,dubCol3,Count(*) | |
from dupTable | |
group by dubCol1,dubCol2,dubCol3 | |
having count(*)>1 | |
-- Technique to delete duplicate rows, if there is a column that's unique, | |
delete | |
from dupTable | |
where uniquecol NOT IN ( | |
select max(uniqueCol) | |
from dupTable | |
group by dupCol1,dupCol2,dupCol3 | |
) | |
-- Example: | |
--Create a temp table to hold logon data | |
create table #temp123 | |
( | |
MachineName nvarchar(50), | |
UserName nvarchar(50), | |
LogonDate datetime, | |
RecordDate datetime | |
) | |
-- find out duplicates and put them into a temp table | |
insert into #temp123 | |
select MachineName,UserName,LogonDate,count(*) as Count | |
from dbo.logon | |
group by MachineName,UserName,LogonDate | |
having count(*)>1 | |
order by count | |
-- delete duplicate logon info (in this case computername,username,logondate are duplicate. ie. got recorded several times) | |
-- but record Date is unique | |
delete | |
from dbo.logon | |
where RecordDate not in ( | |
select max(recorddate) | |
from dbo.logon | |
group by MachineName,UserName,LogonDate | |
) | |
## Recommendations from Mesut Gunes | |
## Test data by creating temp table but use not locking SELECT INTO | |
select MachineName,UserName,LogonDate,RecordDate | |
into #temp123 | |
from dbo.logon | |
where 1=2 | |
## 'NOT IN' is slow as it scans the whole table. Use EXISTS or NOT EXISTS instead, as it uses Index | |
insert into #temp123 | |
select MachineName,UserName,LogonDate,RecordDate | |
from dbo.logon a | |
where EXISTS ( | |
select 1 | |
from dbo.logon | |
where MachineName=a.MachineName and UserName=a.UserName and LogonDate=a.LogonDate AND RecordDate<a.RecordDate | |
) | |
## SubQuery above finds the duplicate records and then returns the one with smaller (prior) RecordDate | |
## If we wanted to find the duplicate records we will keep, we need to use: RecordDate >a.RecordDate | |
-- where MachineName=a.MachineName and UserName=a.UserName and LogonDate=a.LogonDate and RecordDate > a.RecordDate | |
## Now delete the records | |
begin tran t1 --for rollback | |
delete | |
from dbo.Logon | |
where exists ( | |
select 1 | |
from #temp123 | |
where MachineName=dbo.logon.MachineName and UserName=dbo.logon.UserName and LogonDate=dbo.logon.LogonDate and RecordDate=dbo.logon.RecordDate | |
) | |
commit tran t1 -- if successful |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment