Skip to content

Instantly share code, notes, and snippets.

@TheFrankman
Last active August 29, 2015 14:11
Show Gist options
  • Save TheFrankman/f5f2a4c3d92530857ceb to your computer and use it in GitHub Desktop.
Save TheFrankman/f5f2a4c3d92530857ceb to your computer and use it in GitHub Desktop.
Mysql Query - Get Duplicate Email Address - Keep the original emails
# Back up emails into new column
ALTER TABLE `customer_entity`
ADD COLUMN `old_email` VARCHAR(255) NULL;
UPDATE customer_entity SET old_email = email;
# Create two tmp tables
CREATE TEMPORARY TABLE tmp_duplicates (
entity_id INT NOT NULL
, created_at VARCHAR(255)
, email VARCHAR(255)
) ENGINE=MEMORY;
CREATE TEMPORARY TABLE tmp_max_entities (
entity_id INT) ENGINE=MEMORY;
# Get all the duplicates into one place
INSERT INTO tmp_duplicates (entity_id, created_at, email)
SELECT e.entity_id, e.created_at, e.email
FROM `customer_entity` e
INNER JOIN (
SELECT `old_email` as `email`, COUNT(`entity_id`) as `num`
FROM `customer_entity`
GROUP BY `old_email`
HAVING COUNT(`entity_id`) > 1
) a ON e.email = a.email
ORDER BY e.email, e.created_at;
# Pick the latest entries and store the ids
INSERT INTO tmp_max_entities
SELECT MAX(entity_id) FROM tmp_duplicates GROUP BY email;
# Disgard the latest entries from the duplicate table
DELETE FROM tmp_duplicates
WHERE entity_id IN (SELECT entity_id FROM tmp_max_entities);
# Update all email addresses in the customer table which still exist in the duplicate table
UPDATE customer_entity
SET email = CONCAT(email, '_dupe')
WHERE entity_id in (SELECT entity_id FROM tmp_duplicates);
# Clean up
DROP TABLE tmp_duplicates;
DROP TABLE tmp_max_entities;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment