Skip to content

Instantly share code, notes, and snippets.

@ludo
Created November 19, 2014 20:42
Show Gist options
  • Save ludo/3676e121fb7e4634b57d to your computer and use it in GitHub Desktop.
Save ludo/3676e121fb7e4634b57d to your computer and use it in GitHub Desktop.
Source: @kadkins @ https://github.com/brianmario/mysql2/issues/367. Usage: Copy and paste into utf8mb4helper.rb and alter the settings in top of the script and then run ruby utf8mb4helper.rb > utf8mb4helper.sql
# ruby script to generate ddl statements to convert utf8 to utf8mb4
# you will need ruby, rubygems and the mysql2 gem to run this script
# => gem install mysql2 --no-rdoc --no-ri
require 'rubygems'
require 'mysql2'
### settings
dbhost='localhost'
dbuser='root'
dbuserpwd='password'
database='testdbname'
charset='utf8mb4'
collation='utf8mb4_unicode_ci'
### these will store the statements
no_index_and_data_less_than_191=""
alter_tables=""
alter_column_sizes=""
needs_index_resized=""
no_index_but_data_greater_than_191=""
has_index_but_less_than_191 = ""
### connect to database
con = Mysql2::Client.new(:host=>dbhost,:username=>dbuser,:password=>dbuserpwd)
# For each table in database
tables = con.query("SELECT table_name from information_schema.tables where table_schema = '" + database + "';")
tables.each do |h|
# check and update columns
columns = con.query("SHOW FULL COLUMNS FROM `" + database + "`.`" + h["table_name"] + "`;")
columns.each do |c|
# if the column has an index, set the max field size to 191
checkindex=con.query("SELECT * FROM information_schema.statistics WHERE table_schema = '#{database}' AND table_name = '#{h["table_name"]}' AND column_name = '#{c["Field"]}';")
if c["Type"] =~ /^varchar/
# varchar.. check the size
maxsize = con.query("SELECT MAX(LENGTH(`#{c["Field"]}`)) FROM `#{database}`.`#{h["table_name"]}`;")
size = maxsize.first.each_value.collect.first || 0
if checkindex.size > 0
# varchar with an index
if size > 191
needs_index_resized << "/* ALTER TABLE `#{h["table_name"]}` CHANGE `#{c["Field"]}` `#{c["Field"]}` varchar(191); */\n"
else
has_index_but_less_than_191 << "ALTER TABLE `#{h["table_name"]}` CHANGE `#{c["Field"]}` `#{c["Field"]}` varchar(191);\n"
end
else # no index
utf8 = con.query("SELECT character_set_name FROM information_schema.`COLUMNS` C WHERE table_schema = '#{database}' AND table_name = '#{h["table_name"]}' AND column_name = '#{c["Field"]}' AND character_set_name='utf8';")
if utf8.size > 0 # utf8 varchar
if size > 191
no_index_but_data_greater_than_191 << "ALTER TABLE `#{h["table_name"]}` CHANGE `#{c["Field"]}` `#{c["Field"]}` #{c["Type"]} CHARACTER SET '#{charset}' COLLATE '#{collation}';\n"
else
no_index_and_data_less_than_191 << "ALTER TABLE `#{h["table_name"]}` CHANGE `#{c["Field"]}` `#{c["Field"]}` #{c["Type"]} CHARACTER SET '#{charset}' COLLATE '#{collation}';\n"
end
end
end
end
end
# check and update table
alter_tables << "ALTER TABLE `#{h["table_name"]}` CONVERT TO CHARACTER SET '#{charset}' COLLATE '#{collation}';\n"
end
puts "use `#{database}`;"
puts "/* ############ THESE COLUMNS HAVE INDEXES BUT NO DATA > THAN 191-- MAY NEED TO REBUILD INDEX ############ */"
puts has_index_but_less_than_191
puts "\n"
puts "/* ############ THESE COLUMNS HAVE INDEXES AND DATA > 191.. ALTERING THEM WOULD TRUNCATE DATA!!! RESIZE INDEX */"
puts needs_index_resized
puts "\n"
puts "/* ############ THESE COLUMNS DO NOT HAVE AN INDEX AND BUT HAVE DATA > 191 -- BE AWARE OF FUTURE INDICES ############ */"
puts no_index_but_data_greater_than_191
puts "\n"
puts "/* ############ THESE COLUMNS DO NOT HAVE AN INDEX AND CURRENTLY DO NOT HAVE DATA > 191 ############ */"
puts no_index_and_data_less_than_191
puts "\n"
puts "/* ############ CONVERT TABLES TO UTF8MB4 ############ */"
puts alter_tables
puts "\n"
puts "/* ############ CONVERT DATABASE ########### */"
puts "ALTER DATABASE `#{database}` CHARACTER SET = utf8mb4 COLLATE = `#{collation}`;"
puts "\n"
puts "/* ############ SOME HELPFULL COMMANDS TO RUN BEFORE AND/OR AFTER ########### */"
puts "# use `#{database}`; show session variables like 'character%';"
puts "# set names utf8mb4; use `#{database}`; show session variables like 'character%';"
puts "# select `SOMECOLUMN` from `#{database}` where NOT HEX(`SOMECOLUMN`) REGEXP '^([0-7][0-9A-F])*$'"
con.close
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment