Skip to content

Instantly share code, notes, and snippets.

@aadrian
Created March 1, 2020 09:21
Show Gist options
  • Save aadrian/e76bebe44ed1b918c31bb3bd5cde5dfb to your computer and use it in GitHub Desktop.
Save aadrian/e76bebe44ed1b918c31bb3bd5cde5dfb to your computer and use it in GitHub Desktop.
MySQL find duplicate rows for deleteion based on one column!
-- ddl and data ('name' will be duplicated)
CREATE TABLE dupli (id INT(10) NOT NULL, name VARCHAR(100) NOT NULL, PRIMARY KEY (id));
INSERT INTO dupli (id, name) VALUES (1, 'aa');
INSERT INTO dupli (id, name) VALUES (2, 'aa');
INSERT INTO dupli (id, name) VALUES (3, 'bb');
INSERT INTO dupli (id, name) VALUES (4, 'cc');
INSERT INTO dupli (id, name) VALUES (5, 'cc');
INSERT INTO dupli (id, name) VALUES (6, 'aa');
-- count with partition the duplicates ( where 'cnt' > 1 will need cleanup)
select b.id, b.name, count(b.name) over(partition by b.name) as cnt from dupli b
-- use row_number() inside group, since
select b.id, b.name,
row_number() over(partition by b.name) as rnr,
count(b.name) over(partition by b.name) as cnt
from dupli b
-- select only those that have more than one row (except the first one if there are more)
select * from (
select b.id, b.name,
row_number() over(partition by b.name) as rnr,
count(b.name) over(partition by b.name) as cnt
from dupli b
) c
where c.rnr != 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment