Skip to content

Instantly share code, notes, and snippets.

@ingeniarius
Created February 22, 2013 15:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ingeniarius/5014103 to your computer and use it in GitHub Desktop.
Save ingeniarius/5014103 to your computer and use it in GitHub Desktop.
Migrate data from MySQL to PostgreSQL using ActiveRecord Based on https://github.com/braintree/mysql_to_postgresql
#!/usr/bin/env ruby
require 'logger'
require 'active_record'
require 'active_record/base'
require 'active_support'
ActiveRecord::Base.logger = Logger.new("log/mysql_to_postgresql.log")
PROCESSES = 8
TABLES_WITHOUT_SEQUENCES = ["schema_migrations"]
RAILS_ENV = "development"
SKIP_TABLES = %w[]
TABLES = %w[]
db_config = YAML.load_file('config/database.yml')
MYSQL_CONNECTION_CONFIG = db_config["#{RAILS_ENV}_mysql"]
POSTGRES_CONNECTION_CONFIG = db_config["#{RAILS_ENV}_postgres"]
pids = []
PROCESSES.times do |process_index|
pids << fork do
class MysqlModelBase < ActiveRecord::Base
establish_connection(MYSQL_CONNECTION_CONFIG)
end
class PostgresqlModelBase < ActiveRecord::Base
establish_connection(POSTGRES_CONNECTION_CONFIG)
def self.attributes_protected_by_default
[]
end
end
tables = TABLES.empty? ? MysqlModelBase.connection.tables - SKIP_TABLES : TABLES
tables.each_with_index do |table, table_index|
next unless table_index % PROCESSES == process_index
puts "[#{process_index}] Starting table: #{table}"
id_column = case table
when "schema_migrations"
"version"
else
"id"
end
MysqlModelBase.table_name = table
MysqlModelBase.reset_column_information
PostgresqlModelBase.table_name = table
PostgresqlModelBase.reset_column_information
puts "[#{process_index}] -- truncate #{table}"
PostgresqlModelBase.connection.execute %(TRUNCATE "#{table}" CASCADE;)
index = 0
MysqlModelBase.find_in_batches do |records|
start = Time.now
PostgresqlModelBase.transaction do
records.each do |record|
PostgresqlModelBase.create!(record.attributes)
end
end
puts "[#{process_index}] -- #{table} - #{index} - #{records.size} per #{Time.now - start} sec"
index += 1
end
# Set sequence values
unless TABLES_WITHOUT_SEQUENCES.include?(table)
PostgresqlModelBase.connection.execute("select setval('#{table}_#{id_column}_seq', (select max(#{id_column}) from #{table}))")
end
end
end
end
pids.each { |pid| Process.waitpid(pid) }
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment