Skip to content

Instantly share code, notes, and snippets.

@cjthompson
Created March 20, 2013 17:43
Show Gist options
  • Save cjthompson/5206772 to your computer and use it in GitHub Desktop.
Save cjthompson/5206772 to your computer and use it in GitHub Desktop.
Find duplicate records in a MySQL database, save the first row of a set of duplicates, and delete the rest.
-- Assume `Table` has 4 columns: id, col1, col2, col3
CREATE TABLE `dups` (
`id` int(10) unsigned DEFAULT NULL,
`hash` varbinary(32) DEFAULT NULL,
KEY `id` (`id`),
KEY `hash` (`hash`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO dups SELECT MIN(id) AS id, MD5(CONCAT(col1, col2, col3)) AS hash FROM `Table` GROUP BY col1, col2, col3 HAVING COUNT(*) > 1;
CREATE TABLE `temp` (
`id` int(10) unsigned NOT NULL DEFAULT '0',
`hash` varbinary(32) DEFAULT NULL,
KEY `id` (`id`),
KEY `hash` (`hash`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO temp SELECT id, MD5(CONCAT(col1, col2, col3)) as hash FROM `Table`;
DELETE FROM `Table` WHERE id IN (SELECT temp.id FROM temp LEFT JOIN dups USING (hash) WHERE temp.id <> dups.id);
ALTER TABLE `Table` ADD UNIQUE INDEX `unique_index` (col1, col2, col3);
DROP TABLE dups;
DROP TABLE temp;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment