Skip to content

Instantly share code, notes, and snippets.

@janko
Last active November 1, 2020 17:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save janko/99e2196b7af178dc01474b51cd72b5de to your computer and use it in GitHub Desktop.
Save janko/99e2196b7af178dc01474b51cd72b5de to your computer and use it in GitHub Desktop.
Benchmark measuring execution time and memory allocation of different strategies of inserting many records in Sequel – 1a) individual insert, 1b) individual prepared insert, 2a) bulk insert, 2b) batched bulk insert, and 3) database insert
source "https://rubygems.org"
gem "sequel"
gem "pg"
gem "sequel_pg"
gem "memory_profiler"
GEM
remote: https://rubygems.org/
specs:
memory_profiler (0.9.14)
pg (1.2.3)
sequel (5.37.0)
sequel_pg (1.14.0)
pg (>= 0.18.0, != 1.2.0)
sequel (>= 4.38.0)
PLATFORMS
ruby
DEPENDENCIES
memory_profiler
pg
sequel
sequel_pg
BUNDLED WITH
2.1.4
Strategy Execution time Objects allocated Memory allocated
individual inserts 35.7 seconds 610k 478 MB
individual prepared inserts 23.8 seconds 480k 634 MB
bulk insert 8.4 seconds 21k 162 MB
batched bulk insert 7.9 seconds 21k 158 MB
database insert 2.0 seconds 94 0 MB
require "bundler/setup"
require "sequel"
require "benchmark"
require "memory_profiler"
system "createdb dataset-insert"
DB = Sequel.postgres("dataset-insert")
at_exit do
DB.disconnect
system "dropdb dataset-insert"
end
DB.create_table(:playlists) { primary_key :id }
DB.create_table(:users) { primary_key :id }
DB.create_table :activity_logs do
primary_key :id
foreign_key :playlist_id, :playlists, null: false
foreign_key :user_id, :users
String :event, null: false
String :action, null: false
String :message
String :target
Time :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP
end
DB[:playlists].multi_insert [{ id: 1 }, { id: 2 }]
DB[:users].multi_insert [{ id: 1 }]
# 100,000 approval logs
DB[:activity_logs].multi_insert Array.new(100_000, {
playlist_id: 1,
user_id: 1,
event: "approval",
action: "approve",
message: "Looks good!",
})
# 100,000 publication logs
DB[:activity_logs].multi_insert Array.new(100_000, {
playlist_id: 2,
event: "publication",
action: "published",
target: "Video Wall 1",
})
DB.create_table :publication_logs do
primary_key :id
foreign_key :playlist_id, :playlists, null: false
String :action, null: false
String :target
Time :created_at, null: false, default: Sequel::CURRENT_TIMESTAMP
end
measure = -> (name, &block) do
execution_time = nil
memory_report = MemoryProfiler.report do
execution_time = Benchmark.realtime do
DB.transaction(rollback: :always, &block)
end
end
allocated_mb = memory_report.total_allocated_memsize.to_f / (1024 * 1024)
allocations = memory_report.total_allocated
puts "#{name} (%.1fs, memory: %dMB, objects: %d)" % [execution_time, allocated_mb, allocations]
end
measure.("individual inserts") do
# select records we want to move
publication_logs = DB[:activity_logs].where(event: "publication")
# insert each record individually into the new table
publication_logs.each do |log|
DB[:publication_logs].insert(
playlist_id: log[:playlist_id],
action: log[:action],
target: log[:target],
created_at: log[:created_at],
)
end
# delete records from the old table
publication_logs.delete
end
measure.("individual prepared inserts") do
# select records we want to move
publication_logs = DB[:activity_logs].where(event: "publication")
prepared_insert = DB[:publication_logs].prepare :insert, :insert_publication_data,
playlist_id: :$playlist_id, action: :$action, target: :$target, created_at: :$created_at
# insert each record individually into the new table
publication_logs.each do |log|
prepared_insert.call(
playlist_id: log[:playlist_id],
action: log[:action],
target: log[:target],
created_at: log[:created_at],
)
end
# delete records from the old table
publication_logs.delete
end
measure.("bulk insert") do
# select records we want to move
publication_logs = DB[:activity_logs].where(event: "publication")
# insert each record individually into the new table
DB[:publication_logs].import [:playlist_id, :action, :target, :created_at],
publication_logs.map { |log| log.fetch_values(:playlist_id, :action, :target, :created_at) }
# delete records from the old table
publication_logs.delete
end
measure.("batched bulk insert") do
# select records we want to move
publication_logs = DB[:activity_logs].where(event: "publication")
# insert each record individually into the new table
publication_logs.each_slice(1000) do |logs|
DB[:publication_logs].import [:playlist_id, :action, :target, :created_at],
logs.map { |log| log.fetch_values(:playlist_id, :action, :target, :created_at) }
end
# delete records from the old table
publication_logs.delete
end
measure.("database insert") do
# select records we want to move
publication_logs = DB[:activity_logs].where(event: "publication")
DB[:publication_logs].import [:playlist_id, :action, :target, :created_at],
publication_logs.select(:playlist_id, :action, :target, :created_at)
# delete records from the old table
publication_logs.delete
end
DB[:activity_logs].where(event: "publication").delete
DB.alter_table :activity_logs do
drop_column :event # this table will only hold approval logs now
drop_column :target # this was specific to publication logs
set_column_not_null :user_id # only publication logs didn't have user id set
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment