Last active
August 29, 2015 14:17
-
-
Save ericmason/b82b8919d9411249558c to your computer and use it in GitHub Desktop.
Change MySQL columns to UTF-8 without changing their values
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
## | |
# Change the type of MySQL columns that contain UTF-8 data to UTF-8 without changing their values | |
# | |
# Converts the columns to binary, then converts them back to char/text with the utf8 charset | |
# This is useful when the database has some other default value like latin1 but you've been | |
# storing values as UTF-8 | |
## | |
require 'rubygems' | |
require 'mysql2' | |
require 'yaml' | |
require 'trollop' | |
def mysql | |
@mysql ||= Mysql2::Client.new(:host => "localhost", :username => "root") | |
end | |
def execute_sql(sql) | |
puts sql | |
unless @dry_run | |
begin | |
mysql.query sql | |
rescue | |
STDERR.puts "Tried SQL: #{sql}\nERROR: #{$!}" | |
raise unless @ignore_errors | |
end | |
end | |
end | |
def binary_columns | |
mysql.query %{ | |
SELECT table_schema, table_name, column_name, column_type, CHARACTER_SET_NAME, COLLATION_NAME | |
FROM information_schema.columns | |
WHERE table_schema not in ('information_schema', 'mysql', 'performance_schema') | |
and data_type LIKE '%binary%' #{@schema_where} | |
} | |
end | |
def list_full_text_indexes | |
mysql.query(%{ | |
SELECT table_schema as `schema`, | |
table_name AS `table`, | |
index_name AS `index`, | |
GROUP_CONCAT(concat('`', column_name, '`') ORDER BY seq_in_index) AS `columns` | |
FROM information_schema.statistics | |
WHERE index_type = 'FULLTEXT' #{@schema_where} | |
GROUP BY 1,2, 3 | |
}) | |
end | |
# You can't have a full-text index on a binary value, so we have to drop the full-text indexes first | |
def drop_full_text_indexes(full_text_indexes) | |
# Save the original list of full text indexes in case something goes wrong | |
File.open("full_text_indexes-#{Time.now.to_i}.yml", "w") { |o| o.write(YAML.dump(full_text_indexes.map {|r| r})) } | |
# Drop the full text indexes | |
full_text_indexes.each do |index| | |
execute_sql "DROP INDEX `#{index['index']}` on `#{index['schema']}`.`#{index['table']}`" | |
end | |
end | |
# Find all the char columns | |
def char_columns | |
mysql.query(%{ | |
SELECT table_schema, table_name, column_name, column_type, CHARACTER_SET_NAME, COLLATION_NAME | |
FROM information_schema.columns | |
WHERE table_schema not in ('information_schema', 'mysql', 'performance_schema') | |
AND ( | |
CHARACTER_SET_NAME != '#{@change_charset_to}' | |
OR COLLATION_NAME != '#{@change_collation_to}' | |
) | |
and data_type LIKE '%char%' #{@schema_where} | |
}) | |
end | |
def convert_char_columns | |
char_columns.each do |column| | |
# Convert char columns to binary | |
execute_sql "ALTER TABLE `#{column['table_schema']}`.`#{column['table_name']}` MODIFY `#{column['column_name']}` #{column['column_type'].sub('char', 'binary')}" | |
# Convert binary columns back to char, adding UTF-8 | |
execute_sql "ALTER TABLE `#{column['table_schema']}`.`#{column['table_name']}` MODIFY `#{column['column_name']}` #{column['column_type']} CHARACTER SET #{@change_charset_to} COLLATE #{@change_collation_to}" | |
end | |
end | |
def text_columns | |
# Find all the text columns | |
text_columns = mysql.query(%{ | |
SELECT table_schema, table_name, column_name, column_type, CHARACTER_SET_NAME, COLLATION_NAME | |
FROM information_schema.columns | |
WHERE table_schema not in ('information_schema', 'mysql', 'performance_schema') | |
AND ( | |
CHARACTER_SET_NAME != '#{@change_charset_to}' | |
OR COLLATION_NAME != '#{@change_collation_to}' | |
) | |
and data_type LIKE '%text%' #{@schema_where} | |
}) | |
end | |
def convert_text_columns | |
text_columns.each do |column| | |
# Convert text columns to BLOB | |
execute_sql "ALTER TABLE `#{column['table_schema']}`.`#{column['table_name']}` MODIFY `#{column['column_name']}` #{column['column_type'].sub('text', 'blob')}" | |
# Convert BLOB columns back to text, adding UTF-8 | |
execute_sql "ALTER TABLE `#{column['table_schema']}`.`#{column['table_name']}` MODIFY `#{column['column_name']}` #{column['column_type']} CHARACTER SET #{@change_charset_to} COLLATE #{@change_collation_to}" | |
end | |
end | |
def set_default_table_encoding_on_tables | |
# Set the default encoding to utf8 on the tables themselves | |
tables = mysql.query(%{ | |
SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_COLLATION | |
FROM information_schema.tables | |
WHERE table_schema not in ('information_schema', 'mysql', 'performance_schema') | |
AND TABLE_COLLATION != '#{@change_collation_to}' | |
}) | |
tables.each do |table| | |
execute_sql "ALTER TABLE `#{table['TABLE_SCHEMA']}`.`#{table['TABLE_NAME']}` default character set = #{@change_charset_to} collate = #{@change_collation_to}" | |
end | |
end | |
def recreate_full_text_indexes(full_text_indexes) | |
# Re-create the full-text indexes | |
full_text_indexes.each do |index| | |
execute_sql "CREATE FULLTEXT INDEX `#{index['index']}` ON `#{index['schema']}`.`#{index['table']}` (#{index['columns']})"; | |
end | |
end | |
opts = Trollop::options do | |
opt :schema_name, "Only convert schema schema_name", type: String | |
opt :full_text_indexes_file, "Just re-create the full-text indexes from full_text_indexes_file", type: String | |
opt :change_charset_to, "Change charset to change_charset_to", type: String | |
opt :change_collation_to, "Change collation to change_collation_to", type: String | |
opt :dry_run, "Output the SQL statements but don't execute them" | |
opt :ignore_errors, "Ignore errors in SQL statements and keep going" | |
opt :binary_columns, "Just list the binary columns (to look for possible errors)" | |
end | |
@schema_name = opts[:schema_name] | |
@schema_where = @schema_name ? "and table_schema = '#{@schema_name}'" : "" | |
@dry_run = opts[:dry_run] | |
@ignore_errors = opts[:ignore_errors] | |
@change_charset_to = opts[:change_charset_to] || "utf8mb4" | |
@change_collation_to = opts[:change_collation_to] || "utf8mb4_unicode_ci" | |
if opts[:binary_columns] | |
format = "%-20s %-20s %-20s %-15s" | |
puts format % ["Schema", "Table", "Column", "Column Type"] | |
binary_columns.each do |row| | |
puts format % [row['table_schema'], row['table_name'], row['column_name'], row['column_type']] | |
end | |
end | |
if opts[:full_text_indexes_file] | |
recreate_full_text_indexes(YAML.load(File.read opts[:full_text_indexes_file])) | |
exit | |
end | |
begin | |
full_text_indexes = list_full_text_indexes | |
drop_full_text_indexes(full_text_indexes) | |
convert_char_columns | |
convert_text_columns | |
ensure | |
recreate_full_text_indexes(full_text_indexes) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment