Skip to content

Instantly share code, notes, and snippets.

@joshuawscott
Last active December 18, 2015 09:29
Show Gist options
  • Save joshuawscott/5761300 to your computer and use it in GitHub Desktop.
Save joshuawscott/5761300 to your computer and use it in GitHub Desktop.
Optimized Binary Search for a sorted array - returns the index of the element. 6x faster than Array#bsearch for arrays of strings.
class Array
# Returns the index number of a matching element.
# The returned element is not guaranteed.
# target must implement #<=>
# example:
# arr = [2,3,5,7,11,13,17,19,23,29]
# arr.bsearch_index(2) => 0
# arr.bsearch_index(23) => 8
# arr.bsearch_index(10) => nil
# nil always is nil:
# arr = ['a','b',nil]
# arr.bsearch_index(nil) => nil
def bsearch_index(target)
low = 0; high = length - 1
until low > high
mid = low + ((high - low) / 2)
cmp = target.<=>(self[mid])
if cmp == 1
high = mid - 1
elsif cmp == -1
low = mid + 1
elsif cmp == 0
return mid
else
return nil
end
end
return nil
end
end
require 'benchmark'
sparse_array = [] #few dups
1_000_000.times { sparse_array << rand(100_000_000) }
sparse_array.sort!
mid_array = [] # many dups
1_000_000.times { mid_array << rand(1_000_000) }
mid_array.sort!
dense_array = [] # all dups
1_000_000.times { dense_array << rand(1_000) }
dense_array.sort!
sparse_search_array = []
mid_search_array = []
dense_search_array = []
500_000.times { sparse_search_array << sparse_array[rand(1_000_000)] ; sparse_search_array << rand(100_000_000) }
500_000.times { mid_search_array << mid_array[rand(1_000_000)] ; mid_search_array << rand(100_000_000) }
500_000.times { dense_search_array << dense_array[rand(1_000_000)] ; dense_search_array << rand(100_000_000) }
string_array = ('aaaa'..'zzzz').to_a
Benchmark.bmbm do |x|
x.report("bsearch_index sparse") { 1_000_000.times { |i| sparse_array.bsearch_index(sparse_search_array[i]) } }
x.report("bsearch any sparse") { 1_000_000.times { |i| sparse_array.bsearch { |e| e <=> sparse_search_array[i]} } }
x.report("bsearch min sparse") { 1_000_000.times { |i| sparse_array.bsearch { |e| e == sparse_search_array[i]} } }
x.report("bsearch_index mid") { 1_000_000.times { |i| mid_array.bsearch_index(mid_search_array[i]) } }
x.report("bsearch any mid") { 1_000_000.times { |i| mid_array.bsearch { |e| e <=> mid_search_array[i]} } }
x.report("bsearch min mid") { 1_000_000.times { |i| mid_array.bsearch { |e| e == mid_search_array[i]} } }
x.report("bsearch_index dense") { 1_000_000.times { |i| dense_array.bsearch_index(dense_search_array[i]) } }
x.report("bsearch any dense") { 1_000_000.times { |i| dense_array.bsearch { |e| e <=> dense_search_array[i]} } }
x.report("bsearch min dense") { 1_000_000.times { |i| dense_array.bsearch { |e| e == dense_search_array[i]} } }
x.report("bsearch_index strings") { 1_000_000.times { |i| dense_array.bsearch_index(string_array[i]) } }
x.report("bsearch any strings") { 1_000_000.times { |i| dense_array.bsearch { |e| e <=> string_array[i]} } }
x.report("bsearch min strings") { 1_000_000.times { |i| dense_array.bsearch { |e| e == string_array[i]} } }end
# Returns:
user system total real
bsearch_index sparse 3.430000 0.000000 3.430000 ( 3.437059)
bsearch any sparse 1.950000 0.000000 1.950000 ( 1.957959)
bsearch min sparse 1.440000 0.000000 1.440000 ( 1.442488)
bsearch_index mid 3.130000 0.010000 3.140000 ( 3.126290)
bsearch any mid 1.990000 0.000000 1.990000 ( 1.993189)
bsearch min mid 1.460000 0.000000 1.460000 ( 1.459362)
bsearch_index dense 2.280000 0.000000 2.280000 ( 2.284337)
bsearch any dense 2.000000 0.000000 2.000000 ( 2.001197)
bsearch min dense 1.450000 0.000000 1.450000 ( 1.452665)
bsearch_index strings 0.560000 0.000000 0.560000 ( 0.561603)
bsearch any strings 3.040000 0.000000 3.040000 ( 3.039882)
bsearch min strings 3.450000 0.000000 3.450000 ( 3.454454)
require 'test/unit'
class TestBsearchIndex < Test::Unit::TestCase
def test_empty_array
assert_nil([].bsearch_index('myString'))
end
def test_single_element_matching_array
assert_equal(0, ['myString'].bsearch_index('myString'))
end
def test_single_element_nonmatching_array
assert_nil(['myString'].bsearch_index('Other String'))
end
def test_case_sensitive
assert_nil(['mystring'].bsearch_index('MyString'))
end
def test_longer_array
array = %w{0 a aa aaa ab abb b d q y}
assert_equal(0, array.bsearch_index('0'))
assert_equal(9, array.bsearch_index('y'))
assert_equal(1, array.bsearch_index('a'))
assert_equal(6, array.bsearch_index('b'))
assert_nil(array.bsearch_index('1'))
assert_nil(array.bsearch_index('z'))
end
def test_integer_array
array = [2,3,5,7,11,13,17,23]
assert_equal 0, array.bsearch_index(2)
assert_equal 7, array.bsearch_index(23)
assert_nil array.bsearch_index(1)
assert_nil array.bsearch_index(8)
end
def test_nil
array = ["01", "01", "02", "02", "03", "03", "04", "04", "05", "05", "07", "07", "08", "08", "09", "09", "10", "10", "11", "11", "12", "12", "13", "13", "14", "14", nil, nil]
assert_nil array.bsearch_index(nil)
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment