Skip to content

Instantly share code, notes, and snippets.

@wahlg
Created January 28, 2020 16:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wahlg/6319529135321d54719529e5dc535351 to your computer and use it in GitHub Desktop.
Save wahlg/6319529135321d54719529e5dc535351 to your computer and use it in GitHub Desktop.
CHARACTER_SET = 'utf8mb4'.freeze
COLLATION = 'utf8mb4_unicode_ci'.freeze
MAX_VARCHAR_LENGTH = 191
def fix_encoding
current_database = ActiveRecord::Base.connection.current_database
# Fix database
ActiveRecord::Base.connection.execute "ALTER DATABASE `#{current_database}` CHARACTER SET #{CHARACTER_SET} COLLATE #{COLLATION};"
# Calculate checksums on the tables so we can determine if encoding change caused problems.
table_checksums = {}
tables_to_fix = ActiveRecord::Base.connection.select_rows(
"SELECT table_name
FROM information_schema.tables
WHERE table_schema = '#{current_database}'
AND table_collation != '#{COLLATION}';"
)
tables_to_fix.each do |t|
checksum = ActiveRecord::Base.connection.select_rows("CHECKSUM TABLE `#{current_database}`.`#{t[0]}`").first
table_checksums[t] = checksum[1]
end
# Fix columns
columns_to_fix = ActiveRecord::Base.connection.select_rows(
"SELECT table_name, column_name, column_type, is_nullable, character_set_name, collation_name, column_default, data_type, character_maximum_length
FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_schema = '#{current_database}'
AND collation_name is not null
AND (collation_name != '#{COLLATION}' OR character_set_name != '#{CHARACTER_SET}');"
)
columns_to_fix.each do |tc|
column_type = tc[2]
if tc[7] == 'varchar' && tc[8] > MAX_VARCHAR_LENGTH
column_type = "varchar(#{MAX_VARCHAR_LENGTH})"
end
puts "fixing #{tc[0]}.#{tc[1]} was #{tc[4]}/#{tc[5]} changing to #{CHARACTER_SET}/#{COLLATION} #{column_type}"
ActiveRecord::Base.connection.execute(
"ALTER TABLE `#{current_database}`.`#{tc[0]}`
modify `#{tc[1]}` #{column_type}
CHARACTER SET #{CHARACTER_SET}
COLLATE #{COLLATION}
#{tc[3] == 'YES' ? 'NULL' : 'NOT NULL'}
#{tc[6] ? "DEFAULT '#{tc[6]}'" : ''};"
)
end
# Fix tables
tables_to_fix.each do |t|
puts "setting default collation #{t[0]}"
ActiveRecord::Base.connection.execute "ALTER TABLE `#{current_database}`.`#{t[0]}` CONVERT TO CHARACTER SET #{CHARACTER_SET} COLLATE #{COLLATION}"
end
# Verify data has not changed
tables_to_fix.each do |t|
new_checksum = ActiveRecord::Base.connection.select_rows("CHECKSUM TABLE `#{current_database}`.`#{t[0]}`").first
puts "verifying checksum for table #{t}. was: #{table_checksums[t]} is #{new_checksum[1]}"
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment