Last active
April 26, 2019 21:45
-
-
Save isapir/631c33fbb9258d9cc59e82d9336b2676 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** Create test table */ | |
drop table if exists dupes; | |
create temporary table dupes(word text, num int, id int); | |
/** Add test data with duplicates */ | |
insert into dupes(word, num, id) | |
values ('aaa', 100, 1) | |
,('bbb', 200, 2) | |
,('ccc', 300, 3) | |
,('bbb', 400, 4) | |
,('bbb', 200, 5) -- duplicate | |
,('ccc', 300, 6) -- duplicate | |
,('ddd', 400, 7) | |
,('bbb', 400, 8) -- duplicate | |
,('aaa', 100, 9) -- duplicate | |
,('ccc', 300, 10); -- duplicate | |
/** Show all the data */ | |
select * | |
from dupes | |
order by word, num, id; | |
/* | |
word|num|id| | |
----|---|--| | |
aaa |100| 1| | |
aaa |100| 9| | |
bbb |200| 2| | |
bbb |200| 5| | |
bbb |400| 4| | |
bbb |400| 8| | |
ccc |300| 3| | |
ccc |300| 6| | |
ccc |300|10| | |
ddd |400| 7| | |
*/ | |
/** Use DISTINCT to show only the unique word, num records */ | |
select distinct word, num | |
from dupes | |
order by word, num; | |
/* | |
word|num| | |
----|---| | |
aaa |100| | |
bbb |200| | |
bbb |400| | |
ccc |300| | |
ddd |400| | |
*/ | |
/** Use GROUP BY to select unique records with min, max, and count aggregates */ | |
select word, num, min(id), max(id), count(*) | |
from dupes | |
group by word, num | |
order by word, num; | |
/* | |
word|num|min|max|count| | |
----|---|---|---|-----| | |
aaa |100| 1| 9| 2| | |
bbb |200| 2| 5| 2| | |
bbb |400| 4| 8| 2| | |
ccc |300| 3| 10| 3| | |
ddd |400| 7| 7| 1| | |
*/ | |
/** Find the higher id values of duplicates, distinct only added for clarity */ | |
select distinct d2.id | |
from dupes d1 | |
inner join dupes d2 on d2.word=d1.word and d2.num=d1.num | |
where d2.id > d1.id | |
/* | |
id| | |
--| | |
5| | |
6| | |
8| | |
9| | |
10| | |
*/ | |
/** Use the previous query in a subquery to exclude the dupliates with higher id values */ | |
select * | |
from dupes | |
where id not in ( | |
select d2.id | |
from dupes d1 | |
inner join dupes d2 on d2.word=d1.word and d2.num=d1.num | |
where d2.id > d1.id | |
) | |
order by word, num; | |
/* | |
word|num|id| | |
----|---|--| | |
aaa |100| 1| | |
bbb |200| 2| | |
bbb |400| 4| | |
ccc |300| 3| | |
ddd |400| 7| | |
*/ | |
/** Use a Common Table Expression with the ROW_NUMBER() Window Function and select only the rows with row_num=1 */ | |
with cte as ( | |
select * | |
,row_number() over(partition by word, num order by id) as row_num | |
from dupes | |
) | |
select word, num, id | |
from cte | |
where row_num = 1 | |
order by word, num; | |
/* | |
word|num|id| | |
----|---|--| | |
aaa |100| 1| | |
bbb |200| 2| | |
bbb |400| 4| | |
ccc |300| 3| | |
ddd |400| 7| | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment