jordansissel/README.md

## README.md

      
    Raw
  

              README.md
            
          
    improving ruby benchmarking

stdlib's 'benchmark' is not good, at least, for every use case I've ever had for benchmarking.
So here's some hacking I did tonight.
Benchmarking Sequel

db = Sequel.sqlite("/tmp/example.db")
db.run("CREATE TABLE foo (i INTEGER)")
Stud::Benchmark.run(5000) { db[:foo].insert(1) }
rvm 1.9.3 do ruby  -rsequel -r./benchmark.rb -e 'db = Sequel.sqlite("/tmp/example.db"); db.run("CREATE TABLE foo (i INTEGER)"); r = Stud::Benchmark.run(5000) { db[:foo].insert(1) }; puts r; r.pretty_print'
ruby 1.9.3: avg: 0.014027055783200017 stddev: 0.007770915686125679
          0.0078125...0.015625: ███████████████████████████████████████████████
            0.015625...0.03125: ███
              0.03125...0.0625: █
                0.0625...0.125: █
                  0.125...0.25: █
                    0.25...0.5: █

Running in virtualbox on a workstation with an SSD. 14ms average time spent writing one row? that seems crazy.
Running individually to verify:
>> r = Stud::Benchmark.run(1) { db[:foo].insert(1) }
=> ruby 1.9.3: avg: 0.015876558 stddev: NaN
>> r = Stud::Benchmark.run(1) { db[:foo].insert(1) }
=> ruby 1.9.3: avg: 0.015174503 stddev: NaN
>> r = Stud::Benchmark.run(1) { db[:foo].insert(1) }
=> ruby 1.9.3: avg: 0.034166633 stddev: NaN

# Now without benchmark, to verify.
>> start = Time.now; db[:foo].insert(1); Time.now - start
=> 0.01603736
>> start = Time.now; db[:foo].insert(1); Time.now - start
=> 0.016474039
>> start = Time.now; db[:foo].insert(1); Time.now - start
=> 0.013570604

Crazy slow! Still, a good show of where I'm going with this tool :)
Benchmarking tcp connect

Stud::Benchmark.run(50) { TCPSocket.new("semicomplete.com", 80).close };
% rvm 1.9.3 do ruby -rsocket -r./benchmark.rb -e 'r = Stud::Benchmark.run(50) { TCPSocket.new("semicomplete.com", 80).close }; puts r; r.pretty_print'
ruby 1.9.3: avg: 0.07132824932 stddev: 0.020798285116304303
                0.0625...0.125: ████████████████████████████████████████████████
                  0.125...0.25: ██

This next tcp example uses two different hostnames, chosen at random, to highlight latency bands:
>> hosts = [ "www.google.com", "semicomplete.com" ]
=> ["www.google.com", "semicomplete.com"]
>> r = Stud::Benchmark.run(100) { TCPSocket.new(hosts.shuffle.first, 80).close }
=> ruby 1.9.3: avg: 0.057115173200000015 stddev: 0.10279068316129422
>> r.pretty_print
            0.015625...0.03125: ██████████████████████
              0.03125...0.0625: ████
                0.0625...0.125: ████████████████████████
                    0.25...0.5: █
                     1.0...2.0: █

Note the two large groups in the 15-30ms and 62-125ms ranges.
Benchmarking http

Stud::Benchmark.run(50) { Net::HTTP.get("grokhint.herokuapp.com", "/") } 
ruby 1.9.3: avg: 0.35898339220000003 stddev: 0.7865973102824785
                  0.125...0.25: ███████████████████████████████
                    0.25...0.5: ██████████████████
                     4.0...8.0: █


## benchmark.rb
# encoding: UTF-8
# Benchmark Use Cases
#   * Compare performance of different implementations.
#     * run each implementation N times, compare runtimes (histogram, etc)

module Stud
  module Benchmark
    def self.run(iterations=1, &block)
      i = 0
      data = []
      full_start = Time.now
      while i < iterations
        start = Time.now
        block.call
        duration = Time.now - start
        data << duration
        i += 1
      end
      return Results.new(data)
    end # def run

    class Results
      include Enumerable
      # Stolen from https://github.com/holman/spark/blob/master/spark
      TICKS = %w{▁ ▂ ▃ ▄ ▅ ▆ ▇ █}

      def initialize(data)
        @data = data
      end # def initialize

      def environment
        # Older rubies don't have the RUBY_ENGINE defiend
        engine = (RUBY_ENGINE rescue "ruby")
        # Include jruby version in the engine
        engine += (JRUBY_VERSION rescue "")
        version = RUBY_VERSION

        return "#{engine} #{version}"
      end # def environment

      def each(&block)
        @data.each(&block)
      end # def each

      def log_distribution
        return distribution do |value|
          if value == 0
            0 ... 0
          else
            tick = (Math.log2(value).floor).to_f rescue 0
            (2 ** tick) ... (2 ** (tick+1))
          end
        end
      end # def log_distribution

      def distribution(&range_compute)
        raise ArgumentError.new("Missing range computation block") if !block_given?

        max = @data.max
        dist = Hash.new { |h,k| h[k] = 0 }
        each do |value|
          range = range_compute.call(value)
          dist[range] += 1
        end
        return dist
      end # def distribution

      def mean
        if @mean.nil?
          total = Float(@data.count)
          @mean = sum / total
        end
        return @mean
      end # def mean

      def stddev
        # sum of square deviations of mean divided by total values
        return Math.sqrt(inject(0) { |s, v| s + (v - mean) ** 2 } / (@data.count - 1))
      end # def stddev

      def sum
        if @sum.nil?
          @sum = inject(0) { |s,v| s + v }
        end
        return @sum
      end # def sum

      def to_s
        return "#{environment}: avg: #{mean} stddev: #{stddev}"
      end # def to_s

      def pretty_print
        min = @data.min
        max = @data.max
        zmax = Float(max - min) # "zero" at the 'min' value, offset the max.
        incr = 0.1 # 10% increments
        #dist = distribution do |value|
          #percent = (value - min) / zmax
          #if percent == 1
            #(1 - incr ... 1.0)
          #else
            #start = ((percent * 10).floor / 10.0)
            #start ... (start + incr)
          #end
        #end
        dist = log_distribution

        total = dist.inject(0) { |sum, (step, count)| sum + count }
        sorted = dist.sort { |a,b| a.first.begin <=> b.first.begin }
        puts sorted.collect { |lower_bound, count|
          #puts lower_bound
          percent = (count / Float(total))
          "%30s: %s" % [lower_bound, (TICKS.last * (50 * percent).ceil)]
        }.join("\n")

      end # def pretty_print
    end # class Stud::Benchmark::Result
  end # module Benchmark
end # module Stud
	# encoding: UTF-8
	# Benchmark Use Cases
	# * Compare performance of different implementations.
	# * run each implementation N times, compare runtimes (histogram, etc)

	module Stud
	module Benchmark
	def self.run(iterations=1, &block)
	i = 0
	data = []
	full_start = Time.now
	while i < iterations
	start = Time.now
	block.call
	duration = Time.now - start
	data << duration
	i += 1
	end
	return Results.new(data)
	end # def run

	class Results
	include Enumerable
	# Stolen from https://github.com/holman/spark/blob/master/spark
	TICKS = %w{▁ ▂ ▃ ▄ ▅ ▆ ▇ █}

	def initialize(data)
	@data = data
	end # def initialize

	def environment
	# Older rubies don't have the RUBY_ENGINE defiend
	engine = (RUBY_ENGINE rescue "ruby")
	# Include jruby version in the engine
	engine += (JRUBY_VERSION rescue "")
	version = RUBY_VERSION

	return "#{engine} #{version}"
	end # def environment

	def each(&block)
	@data.each(&block)
	end # def each

	def log_distribution
	return distribution do \|value\|
	if value == 0
	0 ... 0
	else
	tick = (Math.log2(value).floor).to_f rescue 0
	(2 tick) ... (2 (tick+1))
	end
	end
	end # def log_distribution

	def distribution(&range_compute)
	raise ArgumentError.new("Missing range computation block") if !block_given?

	max = @data.max
	dist = Hash.new { \|h,k\| h[k] = 0 }
	each do \|value\|
	range = range_compute.call(value)
	dist[range] += 1
	end
	return dist
	end # def distribution

	def mean
	if @mean.nil?
	total = Float(@data.count)
	@mean = sum / total
	end
	return @mean
	end # def mean

	def stddev
	# sum of square deviations of mean divided by total values
	return Math.sqrt(inject(0) { \|s, v\| s + (v - mean) ** 2 } / (@data.count - 1))
	end # def stddev

	def sum
	if @sum.nil?
	@sum = inject(0) { \|s,v\| s + v }
	end
	return @sum
	end # def sum

	def to_s
	return "#{environment}: avg: #{mean} stddev: #{stddev}"
	end # def to_s

	def pretty_print
	min = @data.min
	max = @data.max
	zmax = Float(max - min) # "zero" at the 'min' value, offset the max.
	incr = 0.1 # 10% increments
	#dist = distribution do \|value\|
	#percent = (value - min) / zmax
	#if percent == 1
	#(1 - incr ... 1.0)
	#else
	#start = ((percent * 10).floor / 10.0)
	#start ... (start + incr)
	#end
	#end
	dist = log_distribution

	total = dist.inject(0) { \|sum, (step, count)\| sum + count }
	sorted = dist.sort { \|a,b\| a.first.begin <=> b.first.begin }
	puts sorted.collect { \|lower_bound, count\|
	#puts lower_bound
	percent = (count / Float(total))
	"%30s: %s" % [lower_bound, (TICKS.last * (50 * percent).ceil)]
	}.join("\n")

	end # def pretty_print
	end # class Stud::Benchmark::Result
	end # module Benchmark
	end # module Stud