I ran the following code to benchmark on my local machine:
require 'benchmark'
require 'benchmark/memory'
class OldTest
# NOTE: the code in this class was originally written by @febeling here:
# https://github.com/rails/rails/pull/33954/commits/76f9b5ece6ad833351275445ab9f9d1c886dc293#diff-faefed8fb304ae45721a2645df0e1789R166
def difference(a, b)
set_a = as_set(a)
set_b = as_set(b)
from_set(set_a - set_b)
end
def intersection(a, b)
set_a = as_set(a)
set_b = as_set(b)
from_set(set_a & set_b)
end
def as_set(records)
records.zip(occurrences(records))
end
def from_set(record_set)
record_set.map(&:first)
end
def occurrences(array)
counts = Hash.new(0)
array.map do |object|
counts[object] += 1
end
end
end
class NewTest
def difference(a, b)
set_b = occurrences(b)
a.reject do |object|
occurrence?(set_b, object)
end
end
def intersection(a, b)
set_b = occurrences(b)
a.select do |object|
occurrence?(set_b, object)
end
end
def occurrences(array)
array.each_with_object(Hash.new(0)) do |object, counts|
counts[object] += 1
end
end
def occurrence?(set, object)
set[object] > 0 && set[object] -= 1
end
end
old_test = OldTest.new
new_test = NewTest.new
c = [1, 2, 2, 2, 3, 4, 4, 4, 4, 4, 5, 5, 4, 2, 2, 3]
d = [3, 3, 3, 4, 4, 5, 5, 6, 3, 3, 3, 3, 7, 3, 7, 7]
n = 150_000_000
one_hundred = 100
ten_thousand = 10_000
puts "\n-------------------- DIFFERENCE --------------------\n"
Benchmark.bmbm do |x|
x.report('NEW') { new_test.difference(c, d) * n }
x.report('OLD') { old_test.difference(c, d) * n }
end
puts "\n-------------------- INTERSECTION --------------------\n"
Benchmark.bmbm do |x|
x.report('NEW') { new_test.intersection(c, d) * n }
x.report('OLD') { old_test.intersection(c, d) * n }
end
puts "\n-------------------- MEMORY --------------------\n"
puts "\n-------- 10,000x array size\n"
Benchmark.memory do |x|
x.report('NEW (difference)') { new_test.difference(c * ten_thousand, d * ten_thousand) }
x.report('OLD (difference)') { old_test.difference(c * ten_thousand, d * ten_thousand) }
x.compare!
end
Benchmark.memory do |x|
x.report('NEW (intersection)') { new_test.difference(c * ten_thousand, d * ten_thousand) }
x.report('OLD (intersection)') { old_test.difference(c * ten_thousand, d * ten_thousand) }
x.compare!
end
puts "\n-------- 100x array size\n"
Benchmark.memory do |x|
x.report('NEW (difference)') { new_test.difference(c * one_hundred, d * one_hundred) }
x.report('OLD (difference)') { old_test.difference(c * one_hundred, d * one_hundred) }
x.compare!
end
Benchmark.memory do |x|
x.report('NEW (intersection)') { new_test.intersection(c * one_hundred, d * one_hundred) }
x.report('OLD (intersection)') { old_test.intersection(c * one_hundred, d * one_hundred) }
x.compare!
end
-------------------- DIFFERENCE --------------------
Rehearsal ---------------------------------------
NEW 4.930000 8.080000 13.010000 ( 14.493172)
OLD 5.190000 9.570000 14.760000 ( 16.182891)
----------------------------- total: 27.770000sec
user system total real
NEW 4.050000 3.650000 7.700000 ( 8.085193)
OLD 4.000000 3.530000 7.530000 ( 7.799358)
-------------------- INTERSECTION --------------------
Rehearsal ---------------------------------------
NEW 2.950000 4.670000 7.620000 ( 8.145647)
OLD 2.550000 3.380000 5.930000 ( 6.247103)
----------------------------- total: 13.550000sec
user system total real
NEW 2.080000 1.620000 3.700000 ( 3.710833)
OLD 2.310000 1.790000 4.100000 ( 4.123227)
-------------------- MEMORY --------------------
-------- 10,000x array size
Calculating -------------------------------------
NEW (difference) 3.582M memsize ( 0.000 retained)
4.000 objects ( 0.000 retained)
0.000 strings ( 0.000 retained)
OLD (difference) 19.743M memsize ( 0.000 retained)
320.012k objects ( 0.000 retained)
0.000 strings ( 0.000 retained)
Comparison:
NEW (difference): 3581960 allocated
OLD (difference): 19742528 allocated - 5.51x more
Calculating -------------------------------------
NEW (intersection) 3.582M memsize ( 0.000 retained)
4.000 objects ( 0.000 retained)
0.000 strings ( 0.000 retained)
OLD (intersection) 19.743M memsize ( 0.000 retained)
320.012k objects ( 0.000 retained)
0.000 strings ( 0.000 retained)
Comparison:
NEW (intersection): 3581960 allocated
OLD (intersection): 19742528 allocated - 5.51x more
-------- 100x array size
Calculating -------------------------------------
NEW (difference) 37.808k memsize ( 0.000 retained)
4.000 objects ( 0.000 retained)
0.000 strings ( 0.000 retained)
OLD (difference) 199.976k memsize ( 0.000 retained)
3.212k objects ( 0.000 retained)
0.000 strings ( 0.000 retained)
Comparison:
NEW (difference): 37808 allocated
OLD (difference): 199976 allocated - 5.29x more
Calculating -------------------------------------
NEW (intersection) 38.808k memsize ( 0.000 retained)
4.000 objects ( 0.000 retained)
0.000 strings ( 0.000 retained)
OLD (intersection) 190.216k memsize ( 0.000 retained)
3.212k objects ( 0.000 retained)
0.000 strings ( 0.000 retained)
Comparison:
NEW (intersection): 38808 allocated
OLD (intersection): 190216 allocated - 4.90x more