Skip to content

Instantly share code, notes, and snippets.

@derwiki
Created August 21, 2012 03:44
Show Gist options
  • Save derwiki/3411320 to your computer and use it in GitHub Desktop.
Save derwiki/3411320 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
def file_variable(filename)
File.readlines(filename, 'r').first.to_i
end
TARGET_TABLE = 'action_credits'
SCHEMA = <<-SQL
CREATE TABLE IF NOT EXISTS `action_credits` (
.. omitted for brevity ..
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
SQL
def q(query, verbose=nil, dontrun=nil)
cmd = "mysql action_credit_silo --skip-column-names -e \"#{query}\""
puts cmd if verbose
`#{cmd}` unless dontrun
end
table_rows = {}
# some tables are empty, ignore them
tables = q("show tables like 'action_credit_%'").split.reject do |table|
rows = q("select max(id) from #{table}").split.last.to_i
table_rows[table] = rows unless rows == 0
rows == 0
end
table_rows.sort.each do |table, rows|
puts [table, rows].join(' : ') + " total rows"
end
start = Time.now
table_rows.sort.each do |table, rows|
puts [table, rows].join(' : ') + " total rows"
columns = q("SELECT column_name FROM information_schema.columns WHERE table_name = '#{table}'").split.reject {|col| col == 'id'}
# some of the older tables didn't have this column, but the code had been
# monkey patched to make it look like old tables just had NULL values
has_utm_campaign = columns.include? 'utm_campaign'
insert_columns = (has_utm_campaign ? columns : columns + ['utm_campaign']).join ','
select_columns = (has_utm_campaign ? columns : columns + ['NULL']).join ','
chunk_size = file_variable('CHUNK_SIZE')
(0..(rows / chunk_size)).each do |x|
lower = x * chunk_size
upper = lower + chunk_size - 1
query = <<-SQL.squish
INSERT INTO #{TARGET_TABLE} (#{insert_columns})
SELECT #{select_columns} FROM #{table}
WHERE id BETWEEN #{lower} AND #{upper}
SQL
query_start = Time.now
q(query, true)
puts "Finished at #{Time.now} in #{Time.now - query_start} seconds"
File.open('LAST_WRITTEN', 'w+') {|f| f.write([table, upper].join(','))}
delay = file_variable('DELAY')
puts "Waiting #{delay} seconds" if delay > 0
sleep delay
end
end
puts "Finished in #{((Time.now - start) / 3600).to_i} hours"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment