Skip to content

Instantly share code, notes, and snippets.

@manlon
Last active August 29, 2015 14:27
Show Gist options
  • Save manlon/16abbbcc715cb90ff077 to your computer and use it in GitHub Desktop.
Save manlon/16abbbcc715cb90ff077 to your computer and use it in GitHub Desktop.
module BatchesRandomStart
# `find_in_batches`, but starting from a random position in the set of
# records, then continuing from the beginning of the set. Give up early
# if `time_limit` elapses.
def find_in_batches_random_start(batch_size: 10, time_limit: nil)
relation = scoped
c = relation.count
return if c == 0
# start a random amount through the set of records
start_id = offset(rand(c)).first.try(:id)
return unless start_id
start_time = Time.now
timesup = ->{ time_limit && (start_time + time_limit < Time.now) }
relation.find_in_batches(batch_size: batch_size, start: start_id) do |batch|
yield batch unless batch.empty?
return if timesup.()
end
# start again from 0, processing only records with ids less than where
# we started
relation = relation.where(relation.table[relation.primary_key].lt(start_id))
relation.find_in_batches(batch_size: batch_size, start: 0) do |batch|
yield batch unless batch.empty?
return if timesup.()
end
end
def find_each_random_start(batch_size: 10, time_limit: nil, &block)
find_in_batches_random_start(batch_size: batch_size, time_limit: time_limit) do |batch|
batch.each(&block)
end
end
end
ActiveRecord::Base.extend(BatchesRandomStart)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment