Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save oelbrenner/86d2de8465ae12522b16cfe043a4c270 to your computer and use it in GitHub Desktop.
Save oelbrenner/86d2de8465ae12522b16cfe043a4c270 to your computer and use it in GitHub Desktop.
Migrate a Mongoid / MongoDB database to an ActiveRecord based SQL one. One method to convert the schema, another to migrate data. Copes with most basic data types. Some hacks to infer TEXT columns. Converts embeds_one relationships to AR aggregations. I wrote this for a one-time migration. It could be a good starting point for someone doing the …
# Migrate schema and data from Mongoid to MySQL ActiveRecord
class MongoidMysqlMigrator
def randomize_auto_increment_values(source_models, from=5500, to=10500)
source_models.each do |model|
value = rand(from-to)+from
sql = %(ALTER TABLE #{model.name.tableize} AUTO_INCREMENT=#{value})
puts sql
ActiveRecord::Base.connection.execute(sql)
end
end
# @param sources (Array) list of Mongoid document types
def migrate_data(sources)
sources.each do |source|
size = source.count
migrate_source_collection(source, source.all) if size > 0
end
sources.each do |source_model|
puts "Resolving foreign keys for #{source_model}"
target_class = define_active_record_model_class(source_model)
each_belongs_to(source_model) do |relation|
field_name = relation.foreign_key
if relation.polymorphic?
# Get the list of distinct class names from the polymophic association _type
# fields and perform an update for each
class_names = target_class.group("#{relation.name}_type").map(&"#{relation.name}_type".to_sym)
class_names.map(&:tableize).each do |rel_target_table|
sql = %(UPDATE #{target_class.table_name} t1
INNER JOIN #{rel_target_table} t2 ON
t2.mongo_id = t1.#{field_name}_mongo_id
SET t1.#{field_name} = t2.id)
#puts sql
ActiveRecord::Base.connection.execute(sql)
end
else
rel_target_table = relation.class_name.tableize
sql = %(UPDATE #{target_class.table_name} t1
INNER JOIN #{rel_target_table} t2 ON
t2.mongo_id = t1.#{field_name}_mongo_id
SET t1.#{field_name} = t2.id)
#puts sql
ActiveRecord::Base.connection.execute(sql)
end
end
# Wire up embedded docs to parent
each_embedded_relation(source_model) do |relation|
embedded_class_name = relation.class_name
embedded_class = embedded_class_name.constantize
embedded_mysql_class = define_active_record_model_class(embedded_class)
embedded_ins = embedded_in_relations(embedded_class)
raise "too many embedded_in relations" unless embedded_ins.size == 1
# There's no foreign key on the embedded doc so we infer it from
# the relation name. For polymorphic embeds, this might be something
# like parent_id or addressable_id
rel_name = embedded_ins.values.first.name
field_name = "#{rel_name}_id"
rel_target_table = source_model.name.tableize
sql = %(UPDATE #{embedded_mysql_class.table_name} SET #{field_name}=(
SELECT id FROM #{rel_target_table} WHERE
#{rel_target_table}.mongo_id = #{embedded_mysql_class.table_name}.#{field_name}_mongo_id
))
ActiveRecord::Base.connection.execute(sql)
#puts sql
end
end
end
def truncate_targets(sources)
sources.each do |source|
target_class = define_active_record_model_class(source)
target_class.destroy_all
end
end
# @param sources (Array) list of Mongoid document types
def migrate_schema(mongoid_models)
# Build MySQL schema
mongoid_models.each do |mongoid_model|
table = mongoid_model.name.tableize
# Drop and create SQL table
migrator.create_table table, :force => true
# Add mongo id column
migrator.add_column table, :mongo_id, :string
migrator.add_index table, :mongo_id
# Add data fields
each_basic_field(mongoid_model) do |field|
migrate_field_to_table(table, field)
end
# Add foreign key fields
mongoid_model.relations.each do |rel_name, relation|
if relation.relation == Mongoid::Relations::Referenced::In ||
relation.relation == Mongoid::Relations::Embedded::In
field = relation.foreign_key unless Mongoid::Relations::Embedded::In
field ||= "#{relation.name}_id"
migrator.add_column table, field, :integer
migrator.add_index table, field, :name => field
if relation.relation == Mongoid::Relations::Embedded::In && relation.polymorphic?
type_field = "#{relation.name}_type"
migrator.add_column table, type_field, :string
migrator.add_index table, type_field, :name => type_field
end
migrator.add_column table, field+"_mongo_id", :string, :length => 30
migrator.add_index table, field+"_mongo_id", :name => field+"_mongo_id"
end
if relation.relation == Mongoid::Relations::Embedded::One
# See if we need to convert this embeds_one into a composed_of
klass = relation.class_name.constantize
#if composable_types.include? [relation.inverse_class_name.constantize, klass]
prefix = relation.name
# Migrate all the embedded classes fields into the target
# table but with prefixed field names
each_basic_field(klass) do |field|
migrate_field_to_table(table, field, "#{prefix}_#{field.name}")
end
#end
end
end
# Carrier wave uploaders
each_uploader(mongoid_model) do |field, uploader_type|
migrator.add_column table, field, :string
end
end
end
private
def migrate_field_to_table(table, mongoid_field, field_name = nil)
indexed_fields = mongoid_field.options[:klass].index_options.keys.map(&:keys).map(&:first)
toSqlType = {
String => :string,
Time => :datetime,
Integer => :integer,
Boolean => :boolean,
::Money => [:decimal, :precision => 8, :scale => 2, :default => 0.00],
BigDecimal => [:decimal, :precision => 8, :scale => 3, :default => 0.000],
Date => :date,
Hash => :text,
Array => :text
}
field_name ||= mongoid_field.name
type = mongoid_field.type
sql_type = toSqlType[type]
default = mongoid_field.options[:default]
default = nil if default.is_a?(Proc) || sql_type == :text
null = default.nil?
null = true if sql_type == :text
# Switch some String colums up to text
if sql_type == :string &&
(field_name.include?('notes') ||
field_name == 'value' ||
field_name == 'content' ||
field_name.include?('_footer') ||
field_name.include?('_extra_text'))
sql_type = :text
default = nil
null = true
end
column_options = {
:default => default,
:null => null
}
if sql_type.is_a?(Array)
column_options.merge!(sql_type[1])
sql_type = sql_type.first
end
if sql_type
migrator.add_column table, field_name, sql_type, column_options
if indexed_fields.include? field_name.to_sym
migrator.add_index table, field_name, :name => field_name
end
else
puts colorize("Skipped field #{table}##{field_name} with unknown type #{type}", COLORS[:red])
end
end
def embedded_in_relations(model)
model.relations.select { |k,v| v.relation == Mongoid::Relations::Embedded::In }
end
# @param source_model (Class) the Mongoid model class
# @param collection (Array) the model instances to migrate
def migrate_source_collection(source_model, collection)
target_class = define_active_record_model_class(source_model)
#target_class.destroy_all
collection.each do |source|
target = migrate_source_document(source, target_class)
# Now step through embedded singletons and collections, converting those...
each_embedded_relation(source_model) do |relation|
puts "Migrating embedded #{source_model} #{source.id} #{relation.name}"
embedded_class = relation.class_name.constantize
if relation.relation == Mongoid::Relations::Embedded::One# &&
# composable_types.include? [source_model, embedded_class]
puts "Migrating to aggregation #{target.class} #{embedded_class}"
# it's to be converted to a composed_of
prefix = relation.name
embedded_doc = source.send(relation.name)
if embedded_doc
each_basic_field(embedded_class) do |field|
copy_field_value(embedded_doc, target, field, "#{prefix}_#{field.name}")
end
end
target.save!
else
value = source.send(relation.name)
value = [value] if relation.relation == Mongoid::Relations::Embedded::One
value = value.compact # Embedded document might not exist
migrate_source_collection(embedded_class, value) do |target|
# No foreign key back to the parent document exists for
# embedded documents so we need to set it manually here
foreign_key_name = source_model.name.tableize.singularize+'_id'
target.send("#{foreign_key_name}_mongo_id=", source.id.to_s)
end
end
end
end
end
def copy_field_value(source, target, mongoid_field, target_field_name = nil)
target_field_name ||= mongoid_field.name
type = mongoid_field.type
value = source.send(mongoid_field.name)
value = value.to_s if type == ::Money
target.send("#{target_field_name}=", value)
end
def migrate_source_document(source, target_class)
source_class = source.class
target = target_class.new
puts "Migrating #{source.class} #{source.id}..."
# Store Mongo document ID
target.mongo_id = source.id.to_s
# Copy fields
each_basic_field(source_class) do |field|
copy_field_value(source, target, field)
end
# For each relation, we store the Mongo document id in
# a temporary <key field>_mongo_id string column
each_belongs_to(source_class) do |relation|
field_name = relation.foreign_key
value = source.send(field_name).to_s
target.send("#{field_name}_mongo_id=", value)
end
each_embedded_in(source_class) do |relation|
field_name = "#{relation.name}_id"
value = source.send(relation.name).id.to_s
target.send("#{field_name}_mongo_id=", value)
if relation.polymorphic?
target.send("#{relation.name}_type=", source.send(relation.name).class.name)
end
end
each_uploader(source_class) do |field, uploader_type|
target.send("#{field}=", source.send('[]', field))
end
target.save!
target
end
# Yield block for every basic (not key, uploader etc) field
def each_basic_field(mongoid_model, &block)
mongoid_model.fields.each do |field_name, field|
unless field.name[0] == '_' || # _type or _id
field.name.ends_with?('_id') && field.type == Object || # foreign key
mongoid_model.uploaders.keys.include?(field_name.to_sym) || # uploader
field.type == Symbol # something to do with polymorphic relations e.g. commentable_field
yield(field)
end
end
end
def each_belongs_to(mongoid_model, &block)
# Add foreign key fields
mongoid_model.relations.each do |rel_name, relation|
if relation.relation == Mongoid::Relations::Referenced::In
yield(relation)
end
end
end
def each_embedded_relation(mongoid_model, &block)
# Add foreign key fields
mongoid_model.embedded_relations.each do |rel_name, relation|
yield(relation)
end
end
def each_embedded_in(mongoid_model, &block)
# Add foreign key fields
embedded_in_relations(mongoid_model).each do |rel_name, relation|
yield(relation)
end
end
def each_uploader(mongoid_model, &block)
mongoid_model.uploaders.each do |field, uploader_type|
yield(field, uploader_type)
end
end
def define_active_record_model_class(mongoid_model)
@_ar_class_cache ||= {}
arclass = @_ar_class_cache[mongoid_model]
return arclass if arclass
target_class_name = "Mysql#{mongoid_model.name}"
# Define an ActiveRecord model class
target_class = Class.new(ActiveRecord::Base) do
set_table_name mongoid_model.name.tableize
end
Kernel.const_set(target_class_name, target_class)
@_ar_class_cache[mongoid_model] = target_class
#target_class
end
def migrator
@_migrator ||= ActiveRecord::Migration.new
end
def colorize(text, color_code)
"\033[#{color_code}m#{text}\033[0m"
end
COLORS = {
:black => 30,
:red => 31,
:green => 32,
:yellow => 33,
:blue => 34,
:magenta => 35,
:cyan => 36,
:white => 37,
:warn => 36,
:error => 35
}
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment