Created
August 21, 2012 03:44
-
-
Save derwiki/3411320 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
def file_variable(filename) | |
File.readlines(filename, 'r').first.to_i | |
end | |
TARGET_TABLE = 'action_credits' | |
SCHEMA = <<-SQL | |
CREATE TABLE IF NOT EXISTS `action_credits` ( | |
.. omitted for brevity .. | |
) ENGINE=InnoDB DEFAULT CHARSET=utf8; | |
SQL | |
def q(query, verbose=nil, dontrun=nil) | |
cmd = "mysql action_credit_silo --skip-column-names -e \"#{query}\"" | |
puts cmd if verbose | |
`#{cmd}` unless dontrun | |
end | |
table_rows = {} | |
# some tables are empty, ignore them | |
tables = q("show tables like 'action_credit_%'").split.reject do |table| | |
rows = q("select max(id) from #{table}").split.last.to_i | |
table_rows[table] = rows unless rows == 0 | |
rows == 0 | |
end | |
table_rows.sort.each do |table, rows| | |
puts [table, rows].join(' : ') + " total rows" | |
end | |
start = Time.now | |
table_rows.sort.each do |table, rows| | |
puts [table, rows].join(' : ') + " total rows" | |
columns = q("SELECT column_name FROM information_schema.columns WHERE table_name = '#{table}'").split.reject {|col| col == 'id'} | |
# some of the older tables didn't have this column, but the code had been | |
# monkey patched to make it look like old tables just had NULL values | |
has_utm_campaign = columns.include? 'utm_campaign' | |
insert_columns = (has_utm_campaign ? columns : columns + ['utm_campaign']).join ',' | |
select_columns = (has_utm_campaign ? columns : columns + ['NULL']).join ',' | |
chunk_size = file_variable('CHUNK_SIZE') | |
(0..(rows / chunk_size)).each do |x| | |
lower = x * chunk_size | |
upper = lower + chunk_size - 1 | |
query = <<-SQL.squish | |
INSERT INTO #{TARGET_TABLE} (#{insert_columns}) | |
SELECT #{select_columns} FROM #{table} | |
WHERE id BETWEEN #{lower} AND #{upper} | |
SQL | |
query_start = Time.now | |
q(query, true) | |
puts "Finished at #{Time.now} in #{Time.now - query_start} seconds" | |
File.open('LAST_WRITTEN', 'w+') {|f| f.write([table, upper].join(','))} | |
delay = file_variable('DELAY') | |
puts "Waiting #{delay} seconds" if delay > 0 | |
sleep delay | |
end | |
end | |
puts "Finished in #{((Time.now - start) / 3600).to_i} hours" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment