Skip to content

Instantly share code, notes, and snippets.

@rastislavcore
Created December 13, 2016 09:12
Show Gist options
  • Save rastislavcore/bd658a6ea13708bc8285ff513d3159cb to your computer and use it in GitHub Desktop.
Save rastislavcore/bd658a6ea13708bc8285ff513d3159cb to your computer and use it in GitHub Desktop.
Correct garbled characters from Latin1 to UTF8
# Mysql: Convert encoding to UTF8 without garbled data
#1 Convert to blob then UTF8
ALTER TABLE comments MODIFY title BLOB;
ALTER TABLE comments MODIFY title VARCHAR(255) CHARACTER SET utf8;
#2 convert to BINARY then UTF8
-- test
SELECT CONVERT(CAST(CONVERT(title USING latin1) AS BINARY) USING utf8) FROM comments WHERE id = 123;
-- convert
UPDATE comments SET title = CONVERT(cast(CONVERT(title USING latin1) AS BINARY) USING utf8);
ALTER TABLE comments MODIFY title VARCHAR(255) CHARACTER SET utf8;
#3: iconv
mysqldump comments --add-drop-table users | replace CHARSET=latin1 CHARSET=utf8 | iconv -f latin1 -t utf8 | mysql some_database
#4: Safest approach without double conversion (PREFERRED METHOD)
UPDATE comments SET title = @txt WHERE char_length(title) = LENGTH(@txt := CONVERT(BINARY CONVERT(title USING latin1) USING utf8));
ALTER TABLE comments MODIFY title VARCHAR(255) CHARACTER SET utf8;
# Debugging Tip
SELECT HEX(CONVERT(`title` USING latin1)) FROM `comments` WHERE id=123;
SELECT HEX(CONVERT(CAST(CONVERT(title USING latin1) AS BINARY) USING utf8)) FROM comments WHERE id = 123;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment