Last active
August 29, 2015 14:11
-
-
Save TheFrankman/f5f2a4c3d92530857ceb to your computer and use it in GitHub Desktop.
Mysql Query - Get Duplicate Email Address - Keep the original emails
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Back up emails into new column | |
ALTER TABLE `customer_entity` | |
ADD COLUMN `old_email` VARCHAR(255) NULL; | |
UPDATE customer_entity SET old_email = email; | |
# Create two tmp tables | |
CREATE TEMPORARY TABLE tmp_duplicates ( | |
entity_id INT NOT NULL | |
, created_at VARCHAR(255) | |
, email VARCHAR(255) | |
) ENGINE=MEMORY; | |
CREATE TEMPORARY TABLE tmp_max_entities ( | |
entity_id INT) ENGINE=MEMORY; | |
# Get all the duplicates into one place | |
INSERT INTO tmp_duplicates (entity_id, created_at, email) | |
SELECT e.entity_id, e.created_at, e.email | |
FROM `customer_entity` e | |
INNER JOIN ( | |
SELECT `old_email` as `email`, COUNT(`entity_id`) as `num` | |
FROM `customer_entity` | |
GROUP BY `old_email` | |
HAVING COUNT(`entity_id`) > 1 | |
) a ON e.email = a.email | |
ORDER BY e.email, e.created_at; | |
# Pick the latest entries and store the ids | |
INSERT INTO tmp_max_entities | |
SELECT MAX(entity_id) FROM tmp_duplicates GROUP BY email; | |
# Disgard the latest entries from the duplicate table | |
DELETE FROM tmp_duplicates | |
WHERE entity_id IN (SELECT entity_id FROM tmp_max_entities); | |
# Update all email addresses in the customer table which still exist in the duplicate table | |
UPDATE customer_entity | |
SET email = CONCAT(email, '_dupe') | |
WHERE entity_id in (SELECT entity_id FROM tmp_duplicates); | |
# Clean up | |
DROP TABLE tmp_duplicates; | |
DROP TABLE tmp_max_entities; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment