Skip to content

Instantly share code, notes, and snippets.

@benders
Created June 15, 2009 21:25
Show Gist options
  • Save benders/130353 to your computer and use it in GitHub Desktop.
Save benders/130353 to your computer and use it in GitHub Desktop.
Example script for archiving old data
#!/usr/bin/env ruby
ENV["RAILS_ENV"] = (ENV['RAILS_ENV'] || "development").dup
require File.dirname(__FILE__) + '/../config/boot'
require RAILS_ROOT + '/config/environment'
MAX_AGE = 4.weeks
def archive(tablename, finder_sql)
raise ArgumentError if tablename.blank? or finder_sql.blank?
start_time = Time.now.to_f
deleted_rows = 0
lock_start_time = nil
c = ActiveRecord::Base.connection
archive_db_name = c.current_database + "_archive"
old_table_name = "old_#{tablename}_map"
# By creating this mapping first we reduce locking later
c.execute <<-"EOS"
CREATE TEMPORARY TABLE `old_#{tablename}_map` (`id` INT PRIMARY KEY)
SELECT id FROM `#{tablename}` WHERE #{finder_sql}
EOS
if c.select_value("SELECT COUNT(*) FROM `#{old_table_name}`").to_i > 0
min = c.select_value("SELECT MIN(id) FROM `#{old_table_name}`")
max = c.select_value("SELECT MAX(id) FROM `#{old_table_name}`")
mapped_find = "id IN (SELECT id FROM `#{old_table_name}`) AND id >= #{min} AND id <= #{max}"
ActiveRecord::Base.transaction do
lock_start_time = Time.now.to_f
before = c.select_value("SELECT COUNT(*) FROM `#{archive_db_name}`.`#{tablename}`").to_i
c.insert("INSERT INTO `#{archive_db_name}`.`#{tablename}`
SELECT * FROM `#{tablename}` WHERE #{mapped_find}")
after = c.select_value("SELECT COUNT(*) FROM `#{archive_db_name}`.`#{tablename}`").to_i
deleted_rows = c.delete("DELETE FROM `#{tablename}` WHERE #{mapped_find}")
raise unless (after - before) == deleted_rows
end
end
elapsed_time = "%0.2f" % (Time.now.to_f - start_time)
locked_time = "(%0.2fs locked)" % (Time.now.to_f - lock_start_time) if lock_start_time
puts "Archived #{deleted_rows} #{tablename} in #{elapsed_time}s #{locked_time}"
end
now = ActiveRecord::Base.default_timezone == :utc ? Time.now.utc : Time.now
cutoff_time = ActiveRecord::Base.connection.quote(now - MAX_AGE)
archive 'notifications', "updated_at < #{cutoff_time}"
archive 'replies', "delivered_at < #{cutoff_time}"
archive 'message_bodies', "message_id IN (
SELECT id from messages WHERE updated_at < #{cutoff_time}
AND state NOT IN ('new', 'offered'))"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment