Created
March 30, 2012 13:59
-
-
Save omerida/2251728 to your computer and use it in GitHub Desktop.
Mysql find and update duplicate fields before creating a unique index
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# create temp table | |
CREATE TEMPORARY TABLE link_dupes ( | |
`id` int(11) unsigned NOT NULL, | |
`user_id` int(11) unsigned NOT NULL, | |
`link` varchar(1000) NOT NULL, | |
`link_unique` varchar(1000) NOT NULL | |
); | |
# find rows with duplicate link fields, into temp table | |
# add a hash to the url so that it still works | |
INSERT INTO `link_dupes` | |
SELECT id, user_id, link, CONCAT(link, '#', CONV(id, 10, 36)) FROM `user_links` uout WHERE EXISTS ( | |
SELECT 1 | |
FROM `user_links` uin | |
WHERE uout.link=uin.link AND uout.user_id=uin.user_id | |
LIMIT 1,100 | |
) | |
; | |
# update user_links table with deduped links | |
INSERT INTO `user_links` (id, user_id, link) | |
SELECT id, user_id, link_unique FROM `link_dupes` | |
ON DUPLICATE KEY UPDATE link=VALUES(link); | |
# now we can create a unique index | |
# hash the link using SHA1 so that the fingerprint for a link is a known length that we can index. | |
ALTER TABLE `user_links` ADD `link_hash` char(160) NOT NULL AFTER `link`; | |
UPDATE `user_links` SET `link_hash`=SHA1(`link`); | |
ALTER TABLE `user_links` ADD UNIQUE KEY `idx_user_hash_link` (`user_id`,`link_hash`); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment