Created
March 1, 2020 09:21
-
-
Save aadrian/e76bebe44ed1b918c31bb3bd5cde5dfb to your computer and use it in GitHub Desktop.
MySQL find duplicate rows for deleteion based on one column!
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- ddl and data ('name' will be duplicated) | |
CREATE TABLE dupli (id INT(10) NOT NULL, name VARCHAR(100) NOT NULL, PRIMARY KEY (id)); | |
INSERT INTO dupli (id, name) VALUES (1, 'aa'); | |
INSERT INTO dupli (id, name) VALUES (2, 'aa'); | |
INSERT INTO dupli (id, name) VALUES (3, 'bb'); | |
INSERT INTO dupli (id, name) VALUES (4, 'cc'); | |
INSERT INTO dupli (id, name) VALUES (5, 'cc'); | |
INSERT INTO dupli (id, name) VALUES (6, 'aa'); | |
-- count with partition the duplicates ( where 'cnt' > 1 will need cleanup) | |
select b.id, b.name, count(b.name) over(partition by b.name) as cnt from dupli b | |
-- use row_number() inside group, since | |
select b.id, b.name, | |
row_number() over(partition by b.name) as rnr, | |
count(b.name) over(partition by b.name) as cnt | |
from dupli b | |
-- select only those that have more than one row (except the first one if there are more) | |
select * from ( | |
select b.id, b.name, | |
row_number() over(partition by b.name) as rnr, | |
count(b.name) over(partition by b.name) as cnt | |
from dupli b | |
) c | |
where c.rnr != 1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment