isapir/unique-records.sql

## unique-records.sql
/** Create test table */
drop table if exists dupes;
create temporary table dupes(word text, num int, id int);

/** Add test data with duplicates */
insert into dupes(word, num, id)
values     ('aaa', 100, 1)
	  ,('bbb', 200, 2)
	  ,('ccc', 300, 3)
	  ,('bbb', 400, 4)
	  ,('bbb', 200, 5)     -- duplicate
	  ,('ccc', 300, 6)     -- duplicate
	  ,('ddd', 400, 7)
	  ,('bbb', 400, 8)     -- duplicate
	  ,('aaa', 100, 9)     -- duplicate
	  ,('ccc', 300, 10);   -- duplicate

/** Show all the data */
select 	*
from 	dupes
order by word, num, id;

/*
word|num|id|
----|---|--|
aaa |100| 1|
aaa |100| 9|
bbb |200| 2|
bbb |200| 5|
bbb |400| 4|
bbb |400| 8|
ccc |300| 3|
ccc |300| 6|
ccc |300|10|
ddd |400| 7|
*/

/** Use DISTINCT to show only the unique word, num records */
select distinct word, num
from 	dupes
order by word, num;

/*
word|num|
----|---|
aaa |100|
bbb |200|
bbb |400|
ccc |300|
ddd |400|
*/

/** Use GROUP BY to select unique records with min, max, and count aggregates */
select 	word, num, min(id), max(id), count(*)
from 	dupes
group by word, num
order by word, num;

/*
word|num|min|max|count|
----|---|---|---|-----|
aaa |100|  1|  9|    2|
bbb |200|  2|  5|    2|
bbb |400|  4|  8|    2|
ccc |300|  3| 10|    3|
ddd |400|  7|  7|    1|
*/

/** Find the higher id values of duplicates, distinct only added for clarity */
	select  distinct d2.id
	from 	dupes d1
		inner join dupes d2 on d2.word=d1.word and d2.num=d1.num
	where d2.id > d1.id

/*
id|
--|
 5|
 6|
 8|
 9|
10|
*/

/** Use the previous query in a subquery to exclude the dupliates with higher id values */
select 	*
from 	dupes
where 	id not in (
	select  d2.id
	from 	dupes d1
		inner join dupes d2 on d2.word=d1.word and d2.num=d1.num
	where d2.id > d1.id
)
order by word, num;

/*
word|num|id|
----|---|--|
aaa |100| 1|
bbb |200| 2|
bbb |400| 4|
ccc |300| 3|
ddd |400| 7|
*/

/** Use a Common Table Expression with the ROW_NUMBER() Window Function and select only the rows with row_num=1 */
with cte as (
	select 	*
	       ,row_number() over(partition by word, num order by id) as row_num
	from 	dupes
)
select 	word, num, id
from 	cte
where 	row_num = 1
order by word, num;

/*
word|num|id|
----|---|--|
aaa |100| 1|
bbb |200| 2|
bbb |400| 4|
ccc |300| 3|
ddd |400| 7|
*/
	/** Create test table */
	drop table if exists dupes;
	create temporary table dupes(word text, num int, id int);

	/** Add test data with duplicates */
	insert into dupes(word, num, id)
	values ('aaa', 100, 1)
	,('bbb', 200, 2)
	,('ccc', 300, 3)
	,('bbb', 400, 4)
	,('bbb', 200, 5) -- duplicate
	,('ccc', 300, 6) -- duplicate
	,('ddd', 400, 7)
	,('bbb', 400, 8) -- duplicate
	,('aaa', 100, 9) -- duplicate
	,('ccc', 300, 10); -- duplicate

	/** Show all the data */
	select *
	from dupes
	order by word, num, id;

	/*
	word\|num\|id\|
	----\|---\|--\|
	aaa \|100\| 1\|
	aaa \|100\| 9\|
	bbb \|200\| 2\|
	bbb \|200\| 5\|
	bbb \|400\| 4\|
	bbb \|400\| 8\|
	ccc \|300\| 3\|
	ccc \|300\| 6\|
	ccc \|300\|10\|
	ddd \|400\| 7\|
	*/

	/** Use DISTINCT to show only the unique word, num records */
	select distinct word, num
	from dupes
	order by word, num;

	/*
	word\|num\|
	----\|---\|
	aaa \|100\|
	bbb \|200\|
	bbb \|400\|
	ccc \|300\|
	ddd \|400\|
	*/

	/** Use GROUP BY to select unique records with min, max, and count aggregates */
	select word, num, min(id), max(id), count(*)
	from dupes
	group by word, num
	order by word, num;

	/*
	word\|num\|min\|max\|count\|
	----\|---\|---\|---\|-----\|
	aaa \|100\| 1\| 9\| 2\|
	bbb \|200\| 2\| 5\| 2\|
	bbb \|400\| 4\| 8\| 2\|
	ccc \|300\| 3\| 10\| 3\|
	ddd \|400\| 7\| 7\| 1\|
	*/

	/** Find the higher id values of duplicates, distinct only added for clarity */
	select distinct d2.id
	from dupes d1
	inner join dupes d2 on d2.word=d1.word and d2.num=d1.num
	where d2.id > d1.id

	/*
	id\|
	--\|
	5\|
	6\|
	8\|
	9\|
	10\|
	*/

	/** Use the previous query in a subquery to exclude the dupliates with higher id values */
	select *
	from dupes
	where id not in (
	select d2.id
	from dupes d1
	inner join dupes d2 on d2.word=d1.word and d2.num=d1.num
	where d2.id > d1.id
	)
	order by word, num;

	/*
	word\|num\|id\|
	----\|---\|--\|
	aaa \|100\| 1\|
	bbb \|200\| 2\|
	bbb \|400\| 4\|
	ccc \|300\| 3\|
	ddd \|400\| 7\|
	*/

	/** Use a Common Table Expression with the ROW_NUMBER() Window Function and select only the rows with row_num=1 */
	with cte as (
	select *
	,row_number() over(partition by word, num order by id) as row_num
	from dupes
	)
	select word, num, id
	from cte
	where row_num = 1
	order by word, num;

	/*
	word\|num\|id\|
	----\|---\|--\|
	aaa \|100\| 1\|
	bbb \|200\| 2\|
	bbb \|400\| 4\|
	ccc \|300\| 3\|
	ddd \|400\| 7\|
	*/