Skip to content

Instantly share code, notes, and snippets.

@ericmason
Last active August 29, 2015 14:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ericmason/b82b8919d9411249558c to your computer and use it in GitHub Desktop.
Save ericmason/b82b8919d9411249558c to your computer and use it in GitHub Desktop.
Change MySQL columns to UTF-8 without changing their values
#!/usr/bin/env ruby
##
# Change the type of MySQL columns that contain UTF-8 data to UTF-8 without changing their values
#
# Converts the columns to binary, then converts them back to char/text with the utf8 charset
# This is useful when the database has some other default value like latin1 but you've been
# storing values as UTF-8
##
require 'rubygems'
require 'mysql2'
require 'yaml'
require 'trollop'
def mysql
@mysql ||= Mysql2::Client.new(:host => "localhost", :username => "root")
end
def execute_sql(sql)
puts sql
unless @dry_run
begin
mysql.query sql
rescue
STDERR.puts "Tried SQL: #{sql}\nERROR: #{$!}"
raise unless @ignore_errors
end
end
end
def binary_columns
mysql.query %{
SELECT table_schema, table_name, column_name, column_type, CHARACTER_SET_NAME, COLLATION_NAME
FROM information_schema.columns
WHERE table_schema not in ('information_schema', 'mysql', 'performance_schema')
and data_type LIKE '%binary%' #{@schema_where}
}
end
def list_full_text_indexes
mysql.query(%{
SELECT table_schema as `schema`,
table_name AS `table`,
index_name AS `index`,
GROUP_CONCAT(concat('`', column_name, '`') ORDER BY seq_in_index) AS `columns`
FROM information_schema.statistics
WHERE index_type = 'FULLTEXT' #{@schema_where}
GROUP BY 1,2, 3
})
end
# You can't have a full-text index on a binary value, so we have to drop the full-text indexes first
def drop_full_text_indexes(full_text_indexes)
# Save the original list of full text indexes in case something goes wrong
File.open("full_text_indexes-#{Time.now.to_i}.yml", "w") { |o| o.write(YAML.dump(full_text_indexes.map {|r| r})) }
# Drop the full text indexes
full_text_indexes.each do |index|
execute_sql "DROP INDEX `#{index['index']}` on `#{index['schema']}`.`#{index['table']}`"
end
end
# Find all the char columns
def char_columns
mysql.query(%{
SELECT table_schema, table_name, column_name, column_type, CHARACTER_SET_NAME, COLLATION_NAME
FROM information_schema.columns
WHERE table_schema not in ('information_schema', 'mysql', 'performance_schema')
AND (
CHARACTER_SET_NAME != '#{@change_charset_to}'
OR COLLATION_NAME != '#{@change_collation_to}'
)
and data_type LIKE '%char%' #{@schema_where}
})
end
def convert_char_columns
char_columns.each do |column|
# Convert char columns to binary
execute_sql "ALTER TABLE `#{column['table_schema']}`.`#{column['table_name']}` MODIFY `#{column['column_name']}` #{column['column_type'].sub('char', 'binary')}"
# Convert binary columns back to char, adding UTF-8
execute_sql "ALTER TABLE `#{column['table_schema']}`.`#{column['table_name']}` MODIFY `#{column['column_name']}` #{column['column_type']} CHARACTER SET #{@change_charset_to} COLLATE #{@change_collation_to}"
end
end
def text_columns
# Find all the text columns
text_columns = mysql.query(%{
SELECT table_schema, table_name, column_name, column_type, CHARACTER_SET_NAME, COLLATION_NAME
FROM information_schema.columns
WHERE table_schema not in ('information_schema', 'mysql', 'performance_schema')
AND (
CHARACTER_SET_NAME != '#{@change_charset_to}'
OR COLLATION_NAME != '#{@change_collation_to}'
)
and data_type LIKE '%text%' #{@schema_where}
})
end
def convert_text_columns
text_columns.each do |column|
# Convert text columns to BLOB
execute_sql "ALTER TABLE `#{column['table_schema']}`.`#{column['table_name']}` MODIFY `#{column['column_name']}` #{column['column_type'].sub('text', 'blob')}"
# Convert BLOB columns back to text, adding UTF-8
execute_sql "ALTER TABLE `#{column['table_schema']}`.`#{column['table_name']}` MODIFY `#{column['column_name']}` #{column['column_type']} CHARACTER SET #{@change_charset_to} COLLATE #{@change_collation_to}"
end
end
def set_default_table_encoding_on_tables
# Set the default encoding to utf8 on the tables themselves
tables = mysql.query(%{
SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_COLLATION
FROM information_schema.tables
WHERE table_schema not in ('information_schema', 'mysql', 'performance_schema')
AND TABLE_COLLATION != '#{@change_collation_to}'
})
tables.each do |table|
execute_sql "ALTER TABLE `#{table['TABLE_SCHEMA']}`.`#{table['TABLE_NAME']}` default character set = #{@change_charset_to} collate = #{@change_collation_to}"
end
end
def recreate_full_text_indexes(full_text_indexes)
# Re-create the full-text indexes
full_text_indexes.each do |index|
execute_sql "CREATE FULLTEXT INDEX `#{index['index']}` ON `#{index['schema']}`.`#{index['table']}` (#{index['columns']})";
end
end
opts = Trollop::options do
opt :schema_name, "Only convert schema schema_name", type: String
opt :full_text_indexes_file, "Just re-create the full-text indexes from full_text_indexes_file", type: String
opt :change_charset_to, "Change charset to change_charset_to", type: String
opt :change_collation_to, "Change collation to change_collation_to", type: String
opt :dry_run, "Output the SQL statements but don't execute them"
opt :ignore_errors, "Ignore errors in SQL statements and keep going"
opt :binary_columns, "Just list the binary columns (to look for possible errors)"
end
@schema_name = opts[:schema_name]
@schema_where = @schema_name ? "and table_schema = '#{@schema_name}'" : ""
@dry_run = opts[:dry_run]
@ignore_errors = opts[:ignore_errors]
@change_charset_to = opts[:change_charset_to] || "utf8mb4"
@change_collation_to = opts[:change_collation_to] || "utf8mb4_unicode_ci"
if opts[:binary_columns]
format = "%-20s %-20s %-20s %-15s"
puts format % ["Schema", "Table", "Column", "Column Type"]
binary_columns.each do |row|
puts format % [row['table_schema'], row['table_name'], row['column_name'], row['column_type']]
end
end
if opts[:full_text_indexes_file]
recreate_full_text_indexes(YAML.load(File.read opts[:full_text_indexes_file]))
exit
end
begin
full_text_indexes = list_full_text_indexes
drop_full_text_indexes(full_text_indexes)
convert_char_columns
convert_text_columns
ensure
recreate_full_text_indexes(full_text_indexes)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment