public
Created

StackOverflow - Ruby - Optimize the comparison of two arrays with duplicates

  • Download Gist
subset_bm.rb
Ruby
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
#!/usr/bin/env ruby
 
# benchmarks for http://stackoverflow.com/questions/11349544/ruby-optimize-the-comparison-of-two-arrays-with-duplicates/11352055#comment14951595_11352055
 
require 'rubygems'
require 'multiset'
require 'benchmark'
 
small_b = "cheddaar".split(//)
small_a = "cheddar".split(//)
 
BIG_A_SZ = 10_000
big_b = (0...BIG_A_SZ).to_a
big_a = big_b.dup
big_a.delete_at(rand(BIG_A_SZ))
 
N = 100_000
BIGN = 10
 
def op_del_subset(a,b)
aa = a.dup
b.each do |letter|
aa.delete_at(aa.index(letter)) rescue ""
end
aa.empty?
end
 
def ct_subset(a,b)
a_counts = a.reduce(Hash.new(0)) { |m,v| m[v] += 1; m }
b_counts = b.reduce(Hash.new(0)) { |m,v| m[v] += 1; m }
a_counts.all? { |a_key,a_ct| a_ct <= b_counts[a_key] }
end
 
def ct_acc_subset(a,b)
counts = a.reduce(Hash.new(0)) { |m,v| m[v] += 1; m }
b.each { |v| counts[v] -= 1 }
counts.values.all? { |ct| ct <= 0 }
end
 
def mset_subset(a,b)
Multiset.new(a).subset? Multiset.new(b)
end
 
def slow_ct_subset(a,b)
!a.find{|x| a.count(x) > b.count(x)}
end
 
cases = %w{ op_del ct ct_acc mset slow_ct }
 
Benchmark.bm(15) do |bm|
a, b = small_a, small_b
cases.each do |test|
meth = :"#{test}_subset"
bm.report("s_#{test}") { N.times{ send(meth, a, b); send(meth, b, a) } }
end
 
a, b = big_a, big_b
cases.each do |test|
meth = :"#{test}_subset"
bm.report("b_#{test}") { BIGN.times{ send(meth, a, b); send(meth, b, a) } }
end
 
a, b = small_a, big_b
cases.each do |test|
meth = :"#{test}_subset"
bm.report("sb_#{test}") { BIGN.times{ send(meth, a, b); send(meth, b, a) } }
end
 
end

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.