Skip to content

Instantly share code, notes, and snippets.

@janko janko/benchmark.rb
Last active Sep 16, 2018

Embed
What would you like to do?
Benchmarking different ways of filtering by the associated dataset in SQL
require "sequel"
require "benchmark"
system "createdb testing"
DB = Sequel.postgres("testing")
at_exit do
DB.disconnect
system "dropdb testing"
end
DB.create_table :artists do
primary_key :id
end
DB.create_table :albums do
primary_key :id
foreign_key :artist_id, :artists
Integer :year
end
class Artist < Sequel::Model
one_to_many :albums
end
class Album < Sequel::Model
end
artist_values = []
album_values = []
10_000.times do |idx|
artist_values << { id: idx }
album_values << { artist_id: idx, year: (1960..2018).to_a.sample }
end
DB[:artists].multi_insert(artist_values)
DB[:albums].multi_insert(album_values)
def benchmark(name)
duration = Benchmark.realtime { yield }
puts "#{name} (#{duration})"
end
benchmark("JOIN + DISTINCT") do
Artist
.association_join(:albums)
.where{albums[:year] >= 2000}
.select_all(:artists)
.distinct
.each { |artist| } # load all records
end
benchmark("COUNT subquery") do
new_album_count = Album
.where{year >= 2000}
.where{albums[:artist_id] =~ artists[:id]}
.select{count.function.*}
Artist
.where(new_album_count > 0)
.each { |artist| } # load all records
end
benchmark("IN subquery") do
new_artist_ids = Album
.where{year >= 2000}
.select(:artist_id)
Artist
.where(id: new_artist_ids)
.each { |artist| } # load all records
end
# >> JOIN + DISTINCT (0.012601999915204942)
# >> COUNT subquery (10.89080599998124)
# >> IN subquery (0.008620999986305833)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.