Created
October 7, 2016 21:19
-
-
Save simonds/c5dd2ece238fbff9024f0af4c3a63427 to your computer and use it in GitHub Desktop.
Finds records in a MySQL database that have non-ascii values and fixes double-encoding errors. Accepts table and field name.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DELIMITER ;; | |
DROP PROCEDURE IF EXISTS character_fix;; | |
CREATE DEFINER=`root`@`localhost` PROCEDURE `character_fix`(_table_name VARCHAR(50), _field_name VARCHAR(50)) | |
BEGIN | |
DROP TEMPORARY TABLE IF EXISTS temp_cursor_table; | |
SET @temp_table_query='CREATE TEMPORARY TABLE temp_cursor_table AS '; | |
SET @temp_table_query=concat( @temp_table_query, 'SELECT id, ', _field_name, ' FROM ', _table_name, ' WHERE ', _field_name, ' <> convert(', _field_name, ' using ascii) ORDER BY id'); | |
PREPARE temp_statement from @temp_table_query; | |
EXECUTE temp_statement; | |
DEALLOCATE PREPARE temp_statement; | |
BEGIN | |
DECLARE _done TINYINT DEFAULT FALSE; | |
DECLARE _convert_error TINYINT DEFAULT FALSE; | |
DECLARE _cursor_id INT(11); | |
DECLARE _cursor_field VARCHAR(50); | |
DECLARE _str_converted TEXT CHARSET utf8; | |
DECLARE _field_length INT DEFAULT 0; | |
DECLARE _field_position INT DEFAULT 0; | |
DECLARE _next_two_characters VARCHAR(2) DEFAULT NULL; | |
DECLARE _next_character VARCHAR(1) DEFAULT NULL; | |
DECLARE _cursor_values CURSOR FOR SELECT * FROM temp_cursor_table; | |
DECLARE CONTINUE HANDLER FOR NOT FOUND SET _done = TRUE; | |
DECLARE CONTINUE HANDLER FOR 1300 SET _convert_error = TRUE; | |
OPEN _cursor_values; | |
_read_loop: LOOP | |
FETCH _cursor_values INTO _cursor_id, _cursor_field; | |
IF _done THEN | |
LEAVE _read_loop; | |
END IF; | |
SET _field_length = LENGTH(_cursor_field); | |
SET _field_position = 1; | |
SET _str_converted = ''; | |
_iterator: | |
LOOP | |
IF _field_length = 0 OR _field_position >= _field_length OR _cursor_field IS NULL THEN | |
LEAVE _iterator; | |
END IF; | |
SET _next_two_characters = SUBSTRING(_cursor_field FROM _field_position FOR 2); | |
SET _next_character = SUBSTRING(_cursor_field FROM _field_position FOR 1); | |
SET _next_two_characters = CONVERT(BINARY CONVERT(_next_two_characters USING latin1) USING utf8); | |
IF NOT _convert_error THEN | |
IF LENGTH(_next_two_characters) = 2 THEN | |
SET _str_converted = CONCAT(_str_converted, _next_two_characters); | |
SET _field_position = _field_position + 2; | |
ELSE | |
SET _str_converted = CONCAT(_str_converted, _next_two_characters); | |
SET _field_position = _field_position + 1; | |
END IF; | |
ELSE | |
SET _str_converted = CONCAT(_str_converted, _next_character); | |
SET _field_position = _field_position + 1; | |
END IF; | |
SET _convert_error = FALSE; | |
END LOOP; | |
SET @update_sql = CONCAT("UPDATE ", _table_name, " SET ", _field_name, " = ", QUOTE(_str_converted), " where id = ", _cursor_id, ";"); | |
PREPARE _update_statement FROM @update_sql; | |
EXECUTE _update_statement; | |
DEALLOCATE PREPARE _update_statement; | |
END LOOP; | |
CLOSE _cursor_values; | |
END; | |
DROP TEMPORARY TABLE IF EXISTS temp_cursor_table; | |
END;; | |
DELIMITER ; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment