Skip to content

Instantly share code, notes, and snippets.

@isapir
Last active April 26, 2019 21:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save isapir/631c33fbb9258d9cc59e82d9336b2676 to your computer and use it in GitHub Desktop.
Save isapir/631c33fbb9258d9cc59e82d9336b2676 to your computer and use it in GitHub Desktop.
/** Create test table */
drop table if exists dupes;
create temporary table dupes(word text, num int, id int);
/** Add test data with duplicates */
insert into dupes(word, num, id)
values ('aaa', 100, 1)
,('bbb', 200, 2)
,('ccc', 300, 3)
,('bbb', 400, 4)
,('bbb', 200, 5) -- duplicate
,('ccc', 300, 6) -- duplicate
,('ddd', 400, 7)
,('bbb', 400, 8) -- duplicate
,('aaa', 100, 9) -- duplicate
,('ccc', 300, 10); -- duplicate
/** Show all the data */
select *
from dupes
order by word, num, id;
/*
word|num|id|
----|---|--|
aaa |100| 1|
aaa |100| 9|
bbb |200| 2|
bbb |200| 5|
bbb |400| 4|
bbb |400| 8|
ccc |300| 3|
ccc |300| 6|
ccc |300|10|
ddd |400| 7|
*/
/** Use DISTINCT to show only the unique word, num records */
select distinct word, num
from dupes
order by word, num;
/*
word|num|
----|---|
aaa |100|
bbb |200|
bbb |400|
ccc |300|
ddd |400|
*/
/** Use GROUP BY to select unique records with min, max, and count aggregates */
select word, num, min(id), max(id), count(*)
from dupes
group by word, num
order by word, num;
/*
word|num|min|max|count|
----|---|---|---|-----|
aaa |100| 1| 9| 2|
bbb |200| 2| 5| 2|
bbb |400| 4| 8| 2|
ccc |300| 3| 10| 3|
ddd |400| 7| 7| 1|
*/
/** Find the higher id values of duplicates, distinct only added for clarity */
select distinct d2.id
from dupes d1
inner join dupes d2 on d2.word=d1.word and d2.num=d1.num
where d2.id > d1.id
/*
id|
--|
5|
6|
8|
9|
10|
*/
/** Use the previous query in a subquery to exclude the dupliates with higher id values */
select *
from dupes
where id not in (
select d2.id
from dupes d1
inner join dupes d2 on d2.word=d1.word and d2.num=d1.num
where d2.id > d1.id
)
order by word, num;
/*
word|num|id|
----|---|--|
aaa |100| 1|
bbb |200| 2|
bbb |400| 4|
ccc |300| 3|
ddd |400| 7|
*/
/** Use a Common Table Expression with the ROW_NUMBER() Window Function and select only the rows with row_num=1 */
with cte as (
select *
,row_number() over(partition by word, num order by id) as row_num
from dupes
)
select word, num, id
from cte
where row_num = 1
order by word, num;
/*
word|num|id|
----|---|--|
aaa |100| 1|
bbb |200| 2|
bbb |400| 4|
ccc |300| 3|
ddd |400| 7|
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment