Created
March 20, 2013 17:43
-
-
Save cjthompson/5206772 to your computer and use it in GitHub Desktop.
Find duplicate records in a MySQL database, save the first row of a set of duplicates, and delete the rest.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Assume `Table` has 4 columns: id, col1, col2, col3 | |
CREATE TABLE `dups` ( | |
`id` int(10) unsigned DEFAULT NULL, | |
`hash` varbinary(32) DEFAULT NULL, | |
KEY `id` (`id`), | |
KEY `hash` (`hash`) | |
) ENGINE=InnoDB DEFAULT CHARSET=latin1; | |
INSERT INTO dups SELECT MIN(id) AS id, MD5(CONCAT(col1, col2, col3)) AS hash FROM `Table` GROUP BY col1, col2, col3 HAVING COUNT(*) > 1; | |
CREATE TABLE `temp` ( | |
`id` int(10) unsigned NOT NULL DEFAULT '0', | |
`hash` varbinary(32) DEFAULT NULL, | |
KEY `id` (`id`), | |
KEY `hash` (`hash`) | |
) ENGINE=InnoDB DEFAULT CHARSET=latin1; | |
INSERT INTO temp SELECT id, MD5(CONCAT(col1, col2, col3)) as hash FROM `Table`; | |
DELETE FROM `Table` WHERE id IN (SELECT temp.id FROM temp LEFT JOIN dups USING (hash) WHERE temp.id <> dups.id); | |
ALTER TABLE `Table` ADD UNIQUE INDEX `unique_index` (col1, col2, col3); | |
DROP TABLE dups; | |
DROP TABLE temp; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment