Skip to content

Instantly share code, notes, and snippets.

@AdilHindistan
Last active August 29, 2015 13:58
Show Gist options
  • Save AdilHindistan/9979382 to your computer and use it in GitHub Desktop.
Save AdilHindistan/9979382 to your computer and use it in GitHub Desktop.
##AH - AdilHindistan - 2014-04-04
## See bottom for more efficient way to delete dupes
-- Technique to detect duplicate data in rows
select dubCol1,dubCol2,dubCol3,Count(*)
from dupTable
group by dubCol1,dubCol2,dubCol3
having count(*)>1
-- Technique to delete duplicate rows, if there is a column that's unique,
delete
from dupTable
where uniquecol NOT IN (
select max(uniqueCol)
from dupTable
group by dupCol1,dupCol2,dupCol3
)
-- Example:
--Create a temp table to hold logon data
create table #temp123
(
MachineName nvarchar(50),
UserName nvarchar(50),
LogonDate datetime,
RecordDate datetime
)
-- find out duplicates and put them into a temp table
insert into #temp123
select MachineName,UserName,LogonDate,count(*) as Count
from dbo.logon
group by MachineName,UserName,LogonDate
having count(*)>1
order by count
-- delete duplicate logon info (in this case computername,username,logondate are duplicate. ie. got recorded several times)
-- but record Date is unique
delete
from dbo.logon
where RecordDate not in (
select max(recorddate)
from dbo.logon
group by MachineName,UserName,LogonDate
)
## Recommendations from Mesut Gunes
## Test data by creating temp table but use not locking SELECT INTO
select MachineName,UserName,LogonDate,RecordDate
into #temp123
from dbo.logon
where 1=2
## 'NOT IN' is slow as it scans the whole table. Use EXISTS or NOT EXISTS instead, as it uses Index
insert into #temp123
select MachineName,UserName,LogonDate,RecordDate
from dbo.logon a
where EXISTS (
select 1
from dbo.logon
where MachineName=a.MachineName and UserName=a.UserName and LogonDate=a.LogonDate AND RecordDate<a.RecordDate
)
## SubQuery above finds the duplicate records and then returns the one with smaller (prior) RecordDate
## If we wanted to find the duplicate records we will keep, we need to use: RecordDate >a.RecordDate
-- where MachineName=a.MachineName and UserName=a.UserName and LogonDate=a.LogonDate and RecordDate > a.RecordDate
## Now delete the records
begin tran t1 --for rollback
delete
from dbo.Logon
where exists (
select 1
from #temp123
where MachineName=dbo.logon.MachineName and UserName=dbo.logon.UserName and LogonDate=dbo.logon.LogonDate and RecordDate=dbo.logon.RecordDate
)
commit tran t1 -- if successful
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment