Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Ruby script to update MySQL from Latin1 to UTF8 without data conversion
desc "convert a latin1 database with utf8 data into proper utf8"
task :convert_to_utf8 => :environment do
puts Time.now
dryrun = ENV['DOIT'] != '1'
conn = ActiveRecord::Base.connection
if dryrun
def conn.run_sql(sql)
puts(sql)
end
else
def conn.run_sql(sql)
puts(sql)
execute(sql)
end
end
conn.run_sql "ALTER DATABASE my_database CHARACTER SET utf8 collate utf8_unicode_ci"
# Don't covert views
VIEWS = /(view|_v$)/
big = []
# These are table_name => model_class mappings that aren't rails standard or
# tables that we don't wish to convert (table_name => true).
mapping = { :pos => Pos,
:categories_products => true,
:delayed_jobs => Delayed::Job,
:schema_migrations => true
}.with_indifferent_access
tables = (conn.tables - big).select { |tbl| tbl !~ VIEWS }
puts "Converting #{tables.inspect}"
#(tables - big).each do |tbl|
tables.each do |tbl|
a = ['CHARACTER SET utf8 COLLATE utf8_unicode_ci']
b = []
model = mapping[tbl] || tbl.to_s.classify.constantize
model.columns.each do |col|
type = col.sql_type
nullable = col.null ? '' : ' NOT NULL'
default = col.default ? " DEFAULT '#{col.default}'" : ''
case type
when /varchar/
a << "CHANGE #{col.name} #{col.name} VARBINARY(#{col.limit})"
b << "CHANGE #{col.name} #{col.name} VARCHAR(#{col.limit}) CHARACTER SET utf8 COLLATE utf8_unicode_ci#{nullable}#{default}"
when /text/
a << "CHANGE #{col.name} #{col.name} BLOB"
b << "CHANGE #{col.name} #{col.name} TEXT CHARACTER SET utf8 COLLATE utf8_unicode_ci#{nullable}#{default}"
end
end unless model == true
conn.run_sql "ALTER TABLE #{tbl} #{a.join(', ')}"
conn.run_sql "ALTER TABLE #{tbl} #{b.join(', ')}" if b.present?
end
puts Time.now
puts 'Done!'
end
@buffym

This comment has been minimized.

Copy link

buffym commented May 30, 2013

This was exactly what I needed. Thank you. I added quoting column names in this fork: https://gist.github.com/buffym/5677768 as I ran into a problem with one of my table's column names.

@tboyko

This comment has been minimized.

Copy link

tboyko commented Nov 27, 2013

Works well! I expanded on the fork by @buffym and added automatic database name detection. Makes it a little easier when converting test databases and other staged environments.

https://gist.github.com/tboyko/7680960

@tilo

This comment has been minimized.

Copy link

tilo commented May 30, 2014

Worked well!!
I added some modifications based on the fork by @tboyko

  • added 'db' namespace around the rake task
  • simplified logic around tables which don't have an active record model
  • added list of tables which will be completely ignored

https://gist.github.com/tilo/4b477194ad0a72ff04d3

@rjhancock

This comment has been minimized.

Copy link

rjhancock commented Aug 31, 2014

Added transaction support as well as recovery in case something fails mid conversion.

https://gist.github.com/rjhancock/7372f4517ae11ff0b79b

@brand-it

This comment has been minimized.

Copy link

brand-it commented Jul 25, 2016

Wow thank you for doing this. I was not looking forward to building this code my self.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.