ramonrails/patterns_in_array.rb

## patterns_in_array.rb
# Faker: A ruby gem (MIT) to generate test data
# https://github.com/faker-ruby/faker
require "faker"

# generate a random set of words
#
# => ["temporibus", "laborum", "et", "facilis", "assumenda", "autem", "voluptatem"]
#
# words = rand(5..10).times.collect { Faker::Lorem.word }
words = Array.new(rand(5..10)) { Faker::Lorem.word }

# let's make a random cloud of words
# =>
# [["facilis", "et"],
#  ["autem", "assumenda"],
#  ["temporibus", "tempore"],
#  ["et", "laborum"],
#  ["laborum", "temporibus"],
#  ["voluptatem", "autem"],
#  ["assumenda", "facilis"]]
#
cloud = words.zip( [Faker::Lorem.word] + words[0..-2]).shuffle

# Can you find any pattern in the data? Look closer
#
# now, the problem
# Which is the first word in the sequence?

# (1) loop through the word_cloud (array of array) to build an array
# => ah! rather long and complex loop. let's skip it.

# (2) reduce word_cloud to Hash data structure
# =>
# {"facilis"=>"et",
#  "autem"=>"assumenda",
#  "temporibus"=>"tempore",
#  "et"=>"laborum",
#  "laborum"=>"temporibus",
#  "voluptatem"=>"autem",
#  "assumenda"=>"facilis"}
#
hash = cloud.to_h

# all the words on the left of array are now keys
#
# => ["facilis", "autem", "temporibus", "et", "laborum", "voluptatem", "assumenda"]
#
hash.keys

# the ones on the right are values
#
# => ["et", "assumenda", "tempore", "laborum", "temporibus", "autem", "facilis"]
#
hash.values

# So, which is the first word in the sequence?
# The extra one in the keys :)
#
# => "voluptatem"
#
first = (hash.keys - hash.values).first

# Which is the last word in the sequence?
# the extra one in the values :)
#
# => "tempore"
#
last = (hash.values - hash.keys).first

# And, how do you get the sequence of words from this cloud?

# Solution (1)
#   * start from the first word, collect in an array
#   * loop through the collection to keep appending the next word from the sequence
#   * hash also allows instant lookup instead of looping through the array
#
def get_by_loop(first, hash)
  element = first
  sorted = [element]
  # this loop will end when we are out of elements to search
  while element
    # fetch the word next in sequence after the `element`
    value = hash[element]
    # append it below the `element`
    # do not append blanks ot nil-s
    # value && sorted.append( value) also words equally good
    sorted.append value if value
    # now get ready to search the element next in sequence for `value`
    element = value
  end

  # we have a sorted array of all words
  #
  # => ["voluptatem", "autem", "assumenda", "facilis", "et", "laborum", "temporibus", "tempore"]
  #
  sorted
end
#
# => ["voluptatem", "autem", "assumenda", "facilis", "et", "laborum", "temporibus", "tempore"]
#
get_by_loop(first, hash)

# Solution (2) => use recursion
#
def get_by_recursion(key, hash)
  key && [ key, get_by_recursion(hash[key], hash) ]
end
#
# then, flatten the result and remove nil values
#
# => ["voluptatem", ["autem", ["assumenda", ["facilis", ["et", ["laborum", ["temporibus", ["tempore", nil]]]]]]]]
# => ["voluptatem", "autem", "assumenda", "facilis", "et", "laborum", "temporibus", "tempore"]
#
get_by_recursion( first, hash).flatten.compact

#
# Which is faster? Let's check
#
# I have this handy script in my console (check my other gists)
#
#       # Usage 1: just a block
#       #   bmbm { rand(1..100) }
#
#       # Usage 2: block with a name
#       #   bmbm('abc') { rand(0..100) }
#
#       bmbm('abc') { rand(0..100) }
#       # Usage 3: hash of many blocks, each with a name
#       #   bmbm rand: ->{ rand(0..100) }, time: ->{ Time.now }
#       #   bmbm 'rand': ->{ rand(0..100) }, 'time': ->{ Time.now }
#       #   bmbm Numeric.new => ->{ rand(0..100) }, Time.new => ->{ Time.now }
#
# loop is obviously fatser because
#   * you are reducing the number of calls to the call stack
#   * variables are not getting cloned over and over again
#   * less memory is required
#
bmbm loop: ->{ get_by_loop(first, hash) }, recurse: ->{ get_by_recursion(first, hash).flatten.compact }
#
# Rehearsal -------------------------------------------
# loop      0.000011   0.000001   0.000012 (  0.000011)
# recurse   0.000016   0.000002   0.000018 (  0.000016)
# ---------------------------------- total: 0.000030sec

#               user     system      total        real
# loop      0.000010   0.000001   0.000011 (  0.000008)
# recurse   0.000018   0.000001   0.000019 (  0.000015)

# Of course, there are many optimizations that can be done on this code. Later.
# Have a nice day. Bye. 🙂
	# Faker: A ruby gem (MIT) to generate test data
	# https://github.com/faker-ruby/faker
	require "faker"

	# generate a random set of words
	#
	# => ["temporibus", "laborum", "et", "facilis", "assumenda", "autem", "voluptatem"]
	#
	# words = rand(5..10).times.collect { Faker::Lorem.word }
	words = Array.new(rand(5..10)) { Faker::Lorem.word }

	# let's make a random cloud of words
	# =>
	# [["facilis", "et"],
	# ["autem", "assumenda"],
	# ["temporibus", "tempore"],
	# ["et", "laborum"],
	# ["laborum", "temporibus"],
	# ["voluptatem", "autem"],
	# ["assumenda", "facilis"]]
	#
	cloud = words.zip( [Faker::Lorem.word] + words[0..-2]).shuffle

	# Can you find any pattern in the data? Look closer
	#
	# now, the problem
	# Which is the first word in the sequence?

	# (1) loop through the word_cloud (array of array) to build an array
	# => ah! rather long and complex loop. let's skip it.

	# (2) reduce word_cloud to Hash data structure
	# =>
	# {"facilis"=>"et",
	# "autem"=>"assumenda",
	# "temporibus"=>"tempore",
	# "et"=>"laborum",
	# "laborum"=>"temporibus",
	# "voluptatem"=>"autem",
	# "assumenda"=>"facilis"}
	#
	hash = cloud.to_h

	# all the words on the left of array are now keys
	#
	# => ["facilis", "autem", "temporibus", "et", "laborum", "voluptatem", "assumenda"]
	#
	hash.keys

	# the ones on the right are values
	#
	# => ["et", "assumenda", "tempore", "laborum", "temporibus", "autem", "facilis"]
	#
	hash.values

	# So, which is the first word in the sequence?
	# The extra one in the keys :)
	#
	# => "voluptatem"
	#
	first = (hash.keys - hash.values).first

	# Which is the last word in the sequence?
	# the extra one in the values :)
	#
	# => "tempore"
	#
	last = (hash.values - hash.keys).first

	# And, how do you get the sequence of words from this cloud?

	# Solution (1)
	# * start from the first word, collect in an array
	# * loop through the collection to keep appending the next word from the sequence
	# * hash also allows instant lookup instead of looping through the array
	#
	def get_by_loop(first, hash)
	element = first
	sorted = [element]
	# this loop will end when we are out of elements to search
	while element
	# fetch the word next in sequence after the `element`
	value = hash[element]
	# append it below the `element`
	# do not append blanks ot nil-s
	# value && sorted.append( value) also words equally good
	sorted.append value if value
	# now get ready to search the element next in sequence for `value`
	element = value
	end

	# we have a sorted array of all words
	#
	# => ["voluptatem", "autem", "assumenda", "facilis", "et", "laborum", "temporibus", "tempore"]
	#
	sorted
	end
	#
	# => ["voluptatem", "autem", "assumenda", "facilis", "et", "laborum", "temporibus", "tempore"]
	#
	get_by_loop(first, hash)

	# Solution (2) => use recursion
	#
	def get_by_recursion(key, hash)
	key && [ key, get_by_recursion(hash[key], hash) ]
	end
	#
	# then, flatten the result and remove nil values
	#
	# => ["voluptatem", ["autem", ["assumenda", ["facilis", ["et", ["laborum", ["temporibus", ["tempore", nil]]]]]]]]
	# => ["voluptatem", "autem", "assumenda", "facilis", "et", "laborum", "temporibus", "tempore"]
	#
	get_by_recursion( first, hash).flatten.compact

	#
	# Which is faster? Let's check
	#
	# I have this handy script in my console (check my other gists)
	#
	# # Usage 1: just a block
	# # bmbm { rand(1..100) }
	#
	# # Usage 2: block with a name
	# # bmbm('abc') { rand(0..100) }
	#
	# bmbm('abc') { rand(0..100) }
	# # Usage 3: hash of many blocks, each with a name
	# # bmbm rand: ->{ rand(0..100) }, time: ->{ Time.now }
	# # bmbm 'rand': ->{ rand(0..100) }, 'time': ->{ Time.now }
	# # bmbm Numeric.new => ->{ rand(0..100) }, Time.new => ->{ Time.now }
	#
	# loop is obviously fatser because
	# * you are reducing the number of calls to the call stack
	# * variables are not getting cloned over and over again
	# * less memory is required
	#
	bmbm loop: ->{ get_by_loop(first, hash) }, recurse: ->{ get_by_recursion(first, hash).flatten.compact }
	#
	# Rehearsal -------------------------------------------
	# loop 0.000011 0.000001 0.000012 ( 0.000011)
	# recurse 0.000016 0.000002 0.000018 ( 0.000016)
	# ---------------------------------- total: 0.000030sec

	# user system total real
	# loop 0.000010 0.000001 0.000011 ( 0.000008)
	# recurse 0.000018 0.000001 0.000019 ( 0.000015)

	# Of course, there are many optimizations that can be done on this code. Later.
	# Have a nice day. Bye. 🙂