Instantly share code, notes, and snippets.

# dbenhur/subset_bm.rb Created Jul 5, 2012

StackOverflow - Ruby - Optimize the comparison of two arrays with duplicates
 #!/usr/bin/env ruby # benchmarks for http://stackoverflow.com/questions/11349544/ruby-optimize-the-comparison-of-two-arrays-with-duplicates/11352055#comment14951595_11352055 require 'rubygems' require 'multiset' require 'benchmark' small_b = "cheddaar".split(//) small_a = "cheddar".split(//) BIG_A_SZ = 10_000 big_b = (0...BIG_A_SZ).to_a big_a = big_b.dup big_a.delete_at(rand(BIG_A_SZ)) N = 100_000 BIGN = 10 def op_del_subset(a,b) aa = a.dup b.each do |letter| aa.delete_at(aa.index(letter)) rescue "" end aa.empty? end def ct_subset(a,b) a_counts = a.reduce(Hash.new(0)) { |m,v| m[v] += 1; m } b_counts = b.reduce(Hash.new(0)) { |m,v| m[v] += 1; m } a_counts.all? { |a_key,a_ct| a_ct <= b_counts[a_key] } end def ct_acc_subset(a,b) counts = a.reduce(Hash.new(0)) { |m,v| m[v] += 1; m } b.each { |v| counts[v] -= 1 } counts.values.all? { |ct| ct <= 0 } end def mset_subset(a,b) Multiset.new(a).subset? Multiset.new(b) end def slow_ct_subset(a,b) !a.find{|x| a.count(x) > b.count(x)} end cases = %w{ op_del ct ct_acc mset slow_ct } Benchmark.bm(15) do |bm| a, b = small_a, small_b cases.each do |test| meth = :"#{test}_subset" bm.report("s_#{test}") { N.times{ send(meth, a, b); send(meth, b, a) } } end a, b = big_a, big_b cases.each do |test| meth = :"#{test}_subset" bm.report("b_#{test}") { BIGN.times{ send(meth, a, b); send(meth, b, a) } } end a, b = small_a, big_b cases.each do |test| meth = :"#{test}_subset" bm.report("sb_#{test}") { BIGN.times{ send(meth, a, b); send(meth, b, a) } } end end