Skip to content

Instantly share code, notes, and snippets.

@simonds
Created October 7, 2016 21:19
Show Gist options
  • Save simonds/c5dd2ece238fbff9024f0af4c3a63427 to your computer and use it in GitHub Desktop.
Save simonds/c5dd2ece238fbff9024f0af4c3a63427 to your computer and use it in GitHub Desktop.
Finds records in a MySQL database that have non-ascii values and fixes double-encoding errors. Accepts table and field name.
DELIMITER ;;
DROP PROCEDURE IF EXISTS character_fix;;
CREATE DEFINER=`root`@`localhost` PROCEDURE `character_fix`(_table_name VARCHAR(50), _field_name VARCHAR(50))
BEGIN
DROP TEMPORARY TABLE IF EXISTS temp_cursor_table;
SET @temp_table_query='CREATE TEMPORARY TABLE temp_cursor_table AS ';
SET @temp_table_query=concat( @temp_table_query, 'SELECT id, ', _field_name, ' FROM ', _table_name, ' WHERE ', _field_name, ' <> convert(', _field_name, ' using ascii) ORDER BY id');
PREPARE temp_statement from @temp_table_query;
EXECUTE temp_statement;
DEALLOCATE PREPARE temp_statement;
BEGIN
DECLARE _done TINYINT DEFAULT FALSE;
DECLARE _convert_error TINYINT DEFAULT FALSE;
DECLARE _cursor_id INT(11);
DECLARE _cursor_field VARCHAR(50);
DECLARE _str_converted TEXT CHARSET utf8;
DECLARE _field_length INT DEFAULT 0;
DECLARE _field_position INT DEFAULT 0;
DECLARE _next_two_characters VARCHAR(2) DEFAULT NULL;
DECLARE _next_character VARCHAR(1) DEFAULT NULL;
DECLARE _cursor_values CURSOR FOR SELECT * FROM temp_cursor_table;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET _done = TRUE;
DECLARE CONTINUE HANDLER FOR 1300 SET _convert_error = TRUE;
OPEN _cursor_values;
_read_loop: LOOP
FETCH _cursor_values INTO _cursor_id, _cursor_field;
IF _done THEN
LEAVE _read_loop;
END IF;
SET _field_length = LENGTH(_cursor_field);
SET _field_position = 1;
SET _str_converted = '';
_iterator:
LOOP
IF _field_length = 0 OR _field_position >= _field_length OR _cursor_field IS NULL THEN
LEAVE _iterator;
END IF;
SET _next_two_characters = SUBSTRING(_cursor_field FROM _field_position FOR 2);
SET _next_character = SUBSTRING(_cursor_field FROM _field_position FOR 1);
SET _next_two_characters = CONVERT(BINARY CONVERT(_next_two_characters USING latin1) USING utf8);
IF NOT _convert_error THEN
IF LENGTH(_next_two_characters) = 2 THEN
SET _str_converted = CONCAT(_str_converted, _next_two_characters);
SET _field_position = _field_position + 2;
ELSE
SET _str_converted = CONCAT(_str_converted, _next_two_characters);
SET _field_position = _field_position + 1;
END IF;
ELSE
SET _str_converted = CONCAT(_str_converted, _next_character);
SET _field_position = _field_position + 1;
END IF;
SET _convert_error = FALSE;
END LOOP;
SET @update_sql = CONCAT("UPDATE ", _table_name, " SET ", _field_name, " = ", QUOTE(_str_converted), " where id = ", _cursor_id, ";");
PREPARE _update_statement FROM @update_sql;
EXECUTE _update_statement;
DEALLOCATE PREPARE _update_statement;
END LOOP;
CLOSE _cursor_values;
END;
DROP TEMPORARY TABLE IF EXISTS temp_cursor_table;
END;;
DELIMITER ;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment