Last active
December 27, 2015 15:09
-
-
Save erincerys/7345085 to your computer and use it in GitHub Desktop.
If you require adding a UNIQUE key to a MySQL table, but the desired column contains duplicates, this stored procedure will assist in raising cardinality by appending random numbers to the end of a duplicate record.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DELIMITER $$ | |
CREATE PROCEDURE `sp_utility_deduplicate_column` (IN table_in VARCHAR(64), IN unique_in VARCHAR(64), IN nonunique_in VARCHAR(64), length_in INT(11)) | |
BEGIN | |
/* sp_utility_deduplicate_column | |
IN table_in The name of the table to execute deduplication on | |
IN unique_in The PRIMARY KEY for table_in (auto-incremented integer recommended) | |
IN nonunique_in The column that contains duplicated values (INDEX recommended) | |
IN length_in Length (number of digits) to add to the end of the column value | |
WARNING: If the value of a column that has a duplicated record is near or at the maximum length | |
its data type, the initial attempt to add significant randomness may fail, triggering a | |
second pass. This second pass will remove a number of characters from the end of the | |
value equal to length_in, which could alter the core value! | |
*/ | |
DECLARE duplicates INT(11); | |
DECLARE iteration TINYINT; | |
SET iteration = 1; | |
deduplicate: WHILE 1 DO | |
SELECT COUNT(*) INTO duplicates FROM table AS a JOIN table AS b ON a.column = b.column WHERE a.id <> b.id; | |
IF duplicates > 0 THEN | |
SET @query = CONCAT(" | |
UPDATE ",table_in," AS a | |
JOIN ",table_in," AS b ON a.",column_in," = b.",column_in," | |
SET a.",column_in," = | |
CONCAT( | |
IF(iteration > 1, | |
SUBSTRING(a.",column_in,", 1, CHAR_LENGTH(a.",column_in,") - (",length_in," + 1)), | |
a.",column_in," | |
), | |
' ',FLOOR(1 + (RAND() * 50)) | |
) | |
WHERE a.",unique_in," <> b.",unique_in | |
"); | |
PREPARE STMT FROM @query; | |
EXECUTE STMT; | |
DEALLOCATE PREPARE STMT; | |
SET iteration = iteration + 1; | |
ELSE | |
LEAVE deduplicate; | |
END IF; | |
END WHILE; | |
END$$ | |
DELIMITER ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment