Skip to content

Instantly share code, notes, and snippets.

@schovi
Created October 29, 2021 12:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save schovi/27b577b8e08d669bc40a51c81fb68c53 to your computer and use it in GitHub Desktop.
Save schovi/27b577b8e08d669bc40a51c81fb68c53 to your computer and use it in GitHub Desktop.
module PluckInBatches
extend ActiveSupport::Concern
ID_COLUMN = "id"
class_methods do
def pluck_each(*columns, batch: 1000, limit: _pluck_each_scope_limit, &block)
raise "Block must be provided" unless block
columns = columns.flatten.map(&:to_s)
raise "There must be at least one column to pluck" if columns.empty?
# Find index of :id in the array
id_index = columns.index(ID_COLUMN)
# id_column is still needed to calculate offsets
# add it to the front of the array and remove it when yielding
columns.push(ID_COLUMN) unless id_index
_pluck_each_perform(
batch: batch,
columns: columns,
id_index: id_index,
limit: limit,
&block
)
end
def _pluck_each_scope_limit
self.current_scope&.limit_value
end
def _pluck_each_perform(batch:, columns:, id_index:, limit:)
with_id = id_index != nil
only_id = columns.length == 1 && with_id
id_index = columns.length - 1 unless id_index
scope = self.order(ID_COLUMN => :asc)
id_column_arel = arel_table[ID_COLUMN]
batch_start = 0
total = 0
loop do
batch_start += 1
batch_limit = if limit
diff = limit - total
break if diff == 0
diff < batch ? diff : batch
else
batch
end
items = scope.limit(batch_limit)
.where(id_column_arel.gteq(batch_start))
.pluck(*columns)
items.each do |item|
item.pop unless with_id
yield(*item)
end
current_length = items.length
if limit
total += current_length
elsif current_length < batch_limit
break
end
# Use the last id to calculate where to offset queries
batch_start = only_id ? items.last : items.last[id_index]
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment