conorgriffin/testing.rb

## testing.rb
require 'set'

# NOTE: This requires the benchmark-ips gem
# In case you don't want to install this gem, here's sample output of this file

# Calculating -------------------------------------
# Old 1:         9 i/100ms
# New 1:         4 i/100ms
# New 2:         8 i/100ms
# New 3:         8 i/100ms
# New 4:         8 i/100ms
# New 5:         1 i/100ms
# -------------------------------------------------
# Old 1:       98.1 (±3.1%) i/s -        495 in   5.051814s
# New 1:       47.4 (±10.6%) i/s -        236 in   5.017234s
# New 2:       78.7 (±8.9%) i/s -        392 in   5.030907s
# New 3:       87.5 (±4.6%) i/s -        440 in   5.042861s
# New 4:       84.3 (±5.9%) i/s -        424 in   5.049878s
# New 5:        6.7 (±0.0%) i/s -         34 in   5.067592s
#
# Comparison:
#     Old 1::       98.1 i/s
# New 3::       87.5 i/s - 1.12x slower
# New 4::       84.3 i/s - 1.16x slower
# New 2::       78.7 i/s - 1.25x slower
# New 1::       47.4 i/s - 2.07x slower
# New 5::        6.7 i/s - 14.55x slower

# required gems
require 'benchmark/ips'
require 'test/unit'

class Array
  # The original method we're looking to improve upon
  # The method is intended to clean up an array of strings by separating each string
  # into individual words and then generating a new Array consisting of only unique
  # words.  The idea is to improve upon the original method in performance and/or
  # memory consumption
  def clean_up_old
    self.join(' ').split().uniq
  end

  # This one uses a Set as Sets don't allow duplicates, it removes the need for uniq
  # on the output array.  This one is slower than the original, probably because the
  # merging of Sets for each element in the original Array takes more time than just
  # calling uniq once on the output Array at the end.  However, this should use less
  # memory than the original as the Set will only contain unique words from the
  # input array meaning it will be a subset of the input array.
  def clean_up_new_1
    unique = Set.new
    self.each {
        |words|
      unique.merge(words.split)
    }
    unique.to_a
  end

  # This one uses a Hash which also does not allow duplicates (keys) so I'm using it
  # to store each unique word as a key and setting the value for that key to nil.
  # This should use up less memory again when compared to the original but it's still
  # slower
  def clean_up_new_2
    unique = Hash.new
    self.each {
        |words|
      words.split.each {
          |word|
        unique[word] = nil
      }
    }
    unique.keys
  end

  # This one uses the Array method 'flat_map' which returns an Enumerator for the
  # input Array's elements and splits each String entry, then returns only the
  # unique entries from the result.  This is close in performance to the original
  # but in my opinion it is less readable although it may use less memory.
  def clean_up_new_3
    self.flat_map(&:split).uniq
  end

  # This one moves through the original Array using inject to process
  # each element containing space-separated words and appending them
  # to a new array.  Finally uniq is called to remove duplicate words
  def clean_up_new_4
    self.inject([]) {
        |array, item|
      array.push(*item.split)
    }.uniq
  end

  # This one uses inject to move through the Array, appending each string entry
  # to the previous ones with a space in between, finally splitting into a new
  # Array and using uniq to remove duplicates
  def clean_up_new_5
    self.inject() {
        |a, b|
      a += ' ' + b
    }.split.uniq
  end

end


# Build an Array of random Strings made up of 1 - 6 words from the dictionary.txt
# file (80,000 words)
def build_input_array
  array = Array.new
  dictionary = File.readlines("dictionary.txt")
  5000.times do
    str = dictionary.sample.strip
    word_count = rand(0..5)
    word_count.times do
      str += ' ' + dictionary.sample.strip
    end
    array << str
  end
  array
end

list = build_input_array

Benchmark.ips do |x|
  x.report('Old 1:') do
    list.clean_up_old
  end

  x.report('New 1:') do
    list.clean_up_new_1
  end

  x.report('New 2:') do
    list.clean_up_new_2
  end

  x.report('New 3:') do
    list.clean_up_new_3
  end

  x.report('New 4:') do
    list.clean_up_new_4
  end

  x.report('New 5:') do
    list.clean_up_new_5
  end

  x.compare!
end
	require 'set'

	# NOTE: This requires the benchmark-ips gem
	# In case you don't want to install this gem, here's sample output of this file

	# Calculating -------------------------------------
	# Old 1: 9 i/100ms
	# New 1: 4 i/100ms
	# New 2: 8 i/100ms
	# New 3: 8 i/100ms
	# New 4: 8 i/100ms
	# New 5: 1 i/100ms
	# -------------------------------------------------
	# Old 1: 98.1 (±3.1%) i/s - 495 in 5.051814s
	# New 1: 47.4 (±10.6%) i/s - 236 in 5.017234s
	# New 2: 78.7 (±8.9%) i/s - 392 in 5.030907s
	# New 3: 87.5 (±4.6%) i/s - 440 in 5.042861s
	# New 4: 84.3 (±5.9%) i/s - 424 in 5.049878s
	# New 5: 6.7 (±0.0%) i/s - 34 in 5.067592s
	#
	# Comparison:
	# Old 1:: 98.1 i/s
	# New 3:: 87.5 i/s - 1.12x slower
	# New 4:: 84.3 i/s - 1.16x slower
	# New 2:: 78.7 i/s - 1.25x slower
	# New 1:: 47.4 i/s - 2.07x slower
	# New 5:: 6.7 i/s - 14.55x slower

	# required gems
	require 'benchmark/ips'
	require 'test/unit'

	class Array
	# The original method we're looking to improve upon
	# The method is intended to clean up an array of strings by separating each string
	# into individual words and then generating a new Array consisting of only unique
	# words. The idea is to improve upon the original method in performance and/or
	# memory consumption
	def clean_up_old
	self.join(' ').split().uniq
	end

	# This one uses a Set as Sets don't allow duplicates, it removes the need for uniq
	# on the output array. This one is slower than the original, probably because the
	# merging of Sets for each element in the original Array takes more time than just
	# calling uniq once on the output Array at the end. However, this should use less
	# memory than the original as the Set will only contain unique words from the
	# input array meaning it will be a subset of the input array.
	def clean_up_new_1
	unique = Set.new
	self.each {
	\|words\|
	unique.merge(words.split)
	}
	unique.to_a
	end

	# This one uses a Hash which also does not allow duplicates (keys) so I'm using it
	# to store each unique word as a key and setting the value for that key to nil.
	# This should use up less memory again when compared to the original but it's still
	# slower
	def clean_up_new_2
	unique = Hash.new
	self.each {
	\|words\|
	words.split.each {
	\|word\|
	unique[word] = nil
	}
	}
	unique.keys
	end

	# This one uses the Array method 'flat_map' which returns an Enumerator for the
	# input Array's elements and splits each String entry, then returns only the
	# unique entries from the result. This is close in performance to the original
	# but in my opinion it is less readable although it may use less memory.
	def clean_up_new_3
	self.flat_map(&:split).uniq
	end

	# This one moves through the original Array using inject to process
	# each element containing space-separated words and appending them
	# to a new array. Finally uniq is called to remove duplicate words
	def clean_up_new_4
	self.inject([]) {
	\|array, item\|
	array.push(*item.split)
	}.uniq
	end

	# This one uses inject to move through the Array, appending each string entry
	# to the previous ones with a space in between, finally splitting into a new
	# Array and using uniq to remove duplicates
	def clean_up_new_5
	self.inject() {
	\|a, b\|
	a += ' ' + b
	}.split.uniq
	end

	end


	# Build an Array of random Strings made up of 1 - 6 words from the dictionary.txt
	# file (80,000 words)
	def build_input_array
	array = Array.new
	dictionary = File.readlines("dictionary.txt")
	5000.times do
	str = dictionary.sample.strip
	word_count = rand(0..5)
	word_count.times do
	str += ' ' + dictionary.sample.strip
	end
	array << str
	end
	array
	end

	list = build_input_array

	Benchmark.ips do \|x\|
	x.report('Old 1:') do
	list.clean_up_old
	end

	x.report('New 1:') do
	list.clean_up_new_1
	end

	x.report('New 2:') do
	list.clean_up_new_2
	end

	x.report('New 3:') do
	list.clean_up_new_3
	end

	x.report('New 4:') do
	list.clean_up_new_4
	end

	x.report('New 5:') do
	list.clean_up_new_5
	end

	x.compare!
	end