mhink/gist:72a1cf590ce22d08ad9a

## gistfile1.rb
# A heavily annotated version of this script follows below.

module EachWindow
  def each_window(n, &block)
    if self.length < n
      raise "Length must be greater than window size"
    end

    if block_given?
      (0..self.size-n).each do |ix|
        yield self.slice(ix, n)
      end
    else
      to_enum(__method__, n) do
        (self.size - n + 1)
      end
    end
  end
end

Array.include(EachWindow)

def sliceify(words, n)

  words.
    reject { |word| word.length < n }.
    map    { |word| word.
             chars.
             each_window(n).
             each_with_object(word).
             map { |slice, word|
               [slice.join(""), word]
             }}.
    flatten(1).
    sort_by { |(slice, word)| slice.downcase }
end

in_filename  = ARGV[1] || "hello_world.in"
out_filename = ARGV[2] || "hello_world.out"

begin
  infile = File.open(in_filename, "r")
  outfile = File.open(out_filename, "w+")

  words = infile.each_line.map(&:chomp)

  sliceify(words, 4).each do |(slice, word)|
    outfile.puts "#{slice} #{word}"
  end
ensure
  infile.close
  outfile.close
end

# -------------------------------------------------------------------------
# First, let's implement the idea of 'sliding' a window across a sequence
# of elements. The Ruby way is to ask 'what methods does this object respond
# to' rather than 'what is the Class of this object'- therefore, we implement
# this as a Module which can be included into any Object which responds to
# the #each, #size, and #slice method calls.
#
# You have to trust that this isn't too much like the relevant XKCD (974)- it's
# more robust to implement this as a standalone idea. The reason? Because
# we're not relying on assumptions about *what the object is*- we're relying on
# assumptions about *how the object behaves*.

module EachWindow

  # This method is an 'iterator-style' method, which can be chained
  # with other iterators like #each, #inject, and #map.
  #
  # Its first argument is the size of the window we would like to
  # iterate over. It yields each 'window' to the block it's given.
  def each_window(n, &block)

    # If we tried to chop a string of length 4 out of, say, "Foo",
    # we would get non-sensical results.  So we implement a guard
    # to prevent Weird Shit like that from happening.
    if self.length < n
      raise "Length must be greater than window size"
    end

    if block_given?

      # Here's the meat of the method. Suppose we're sliding a window
      # of length 2 over this Array:
      #
      # index:      0    1    2
      # array:    ['a', 'b', 'c'] (size is 3)
      # slice 1:  ['a', 'b']
      # slice 2:  [     'b', 'c']
      #
      # The size of the array itself is 3, and the size of our window
      # is 2. Simple reasoning shows that we can only *start* our slices
      # at the indices 0 and 1.

      (0..self.size-n).each do |ix|
        # At each point, we extract the "slice" of the Array and yield it
        # to the block provided by the calling code.
        yield self.slice(ix..ix + n)
      end

    else
      # On the other hand, if we weren't given a block to yield our slices
      # to, we use the Kernel#to_enum method to create an Enumerator object
      # which we can chain other enumerator-style methods onto. We'll see
      # this in action later.
      to_enum(__method__, n) do
        (self.size - n)
      end
    end
  end
end

# Because Array includes #each, #size, and #slice, we can "mix in" the module
# defined above, which means that any instance of Array will get a copy of the
# #each_window method. Score!

Array.include(EachWindow)

# Now, we implement the actual algorithm described in the problem.  It takes
# an array and a window size. Again, we don't want to abstract *too* much, but
# we have an invariant that we want to remove words which are shorter than
# our window size.
def sliceify(words, n)

  # First, we take the words we were given. For the sake of example, let's
  # use the array ['pooba', 'baz', 'quux'], with a window size of 4.
  words.

    # Now, we filter out words less than 4 characters long.  The return
    # value of #reject is ['pooba', 'quux'].
    reject { |word| word.length < n }.

    # Now, we are building a new array, whose elements represent each
    # element of ['pooba', 'quux'] with a transformation applied.
    map    { |word| word.
             # The #chars method turns a String like
             # "pooba" into an Array like ['p', 'o', 'o', 'b', 'a'].
             chars.

             # Our #each_window method has its chance to shine! It will yield
             # the Arrays ['p', 'o', 'o', 'b'] and ['o', 'o', 'b', 'a']. But
             # we still need to get those into a dictionary-like format.
             each_window(n).

             # Next, we *compose* our Enumerator with the #each_with_object
             # method. Basically we want to iterate over each of the Arrays
             # above and do something with the word corresponding to it.
             each_with_object(word).

             # Finally, we want to build an Array out of the result of each
             # execution of this block. Within this block, the 'slice' parameter
             # is being provided by #each_window, and the 'word' parameter is
             # being provided by #each_with_object.
             map { |slice, word|

               # And now we wave the black-magic wand. In the first execution
               # of this block, 'slice' is the Array ['p', 'o', 'o', 'b'], and
               # 'word' is the String "pooba". We #join the former back into a
               # String, and stuff it into its own little pair, along with the
               # word it was sliced out of.
               [slice.join(""), word]
             }}.

    # Whew! At this point, we have an Array of Arrays which looks like this:
    # [
    #   [
    #     ['poob', 'pooba'],
    #     ['ooba', 'pooba']
    #   ],
    #   [
    #     ['quux', 'quux']
    #   ]
    # ]
    #
    # We need to get all those two-element arrays of String back into one
    # 'unified' Array. Because this structure is self-similar in a way, we
    # can #flatten one layer of it and get what we want:
    # [
    #   ['poob', 'pooba'],
    #   ['ooba', 'pooba']
    #   ['quux', 'quux']
    # ]
    flatten(1).

    # Finally, we take each of these Arrays and sort them, using their
    # first element as the key.
    sort_by do |(slice, word)|

             # One final bugfix: apparently, upper-case characters are
             # sorted before lower-case characters: "Zot" will be sorted
             # before "alpha". So we make sure to sort these strings based
             # on their lower-case versions.
             #
             # One learns something new every day.
             slice.downcase
            end
    # So now, we have the following array:
    # [
    #   ['ooba', 'pooba']
    #   ['poob', 'pooba'],
    #   ['quux', 'quux']
    # ]
    #
    # Which is what we wanted all along.
end

# So now, on to the actual usage of all this! Let's pull the filename
# out of our script's arguments, with a fallback.
in_filename  = ARGV[1] || "hello_world.in"
out_filename = ARGV[2] || "hello_world.out"

# We use the begin..ensure block to make sure we're being good stewards
# and closing our files, even if an Exception gets thrown somewhere.
begin

  # Good practice: only open your files with the flags you actually need.
  # We open our input file with a "read-only" flag and our output file with
  # the "write-only" flag.
  infile = File.open(in_filename, "r")
  outfile = File.open(out_filename, "w+")

  # We get each line of the input file, and map the #chomp method over it
  # to remove newline characters.
  words = infile.each_line.map(&:chomp)

  # And finally, we simply use our implementation of the algorithm above...
  slices = sliceify(words, 4)

  # ...and dump each line into our output file.  (We could have been really
  # clever and made #sliceify into an enumerator-style ethod like #each_window,
  # but I figure that's pushing things a little bit.)
  slices.each do |(slice, word)|
    outfile.puts "#{slice} #{word}"
  end
ensure

  # And close our files.
  infile.close
  outfile.close
end

# fin.
	# A heavily annotated version of this script follows below.

	module EachWindow
	def each_window(n, &block)
	if self.length < n
	raise "Length must be greater than window size"
	end

	if block_given?
	(0..self.size-n).each do \|ix\|
	yield self.slice(ix, n)
	end
	else
	to_enum(__method__, n) do
	(self.size - n + 1)
	end
	end
	end
	end

	Array.include(EachWindow)

	def sliceify(words, n)

	words.
	reject { \|word\| word.length < n }.
	map { \|word\| word.
	chars.
	each_window(n).
	each_with_object(word).
	map { \|slice, word\|
	[slice.join(""), word]
	}}.
	flatten(1).
	sort_by { \|(slice, word)\| slice.downcase }
	end

	in_filename = ARGV[1] \|\| "hello_world.in"
	out_filename = ARGV[2] \|\| "hello_world.out"

	begin
	infile = File.open(in_filename, "r")
	outfile = File.open(out_filename, "w+")

	words = infile.each_line.map(&:chomp)

	sliceify(words, 4).each do \|(slice, word)\|
	outfile.puts "#{slice} #{word}"
	end
	ensure
	infile.close
	outfile.close
	end

	# -------------------------------------------------------------------------
	# First, let's implement the idea of 'sliding' a window across a sequence
	# of elements. The Ruby way is to ask 'what methods does this object respond
	# to' rather than 'what is the Class of this object'- therefore, we implement
	# this as a Module which can be included into any Object which responds to
	# the #each, #size, and #slice method calls.
	#
	# You have to trust that this isn't too much like the relevant XKCD (974)- it's
	# more robust to implement this as a standalone idea. The reason? Because
	# we're not relying on assumptions about what the object is- we're relying on
	# assumptions about how the object behaves.

	module EachWindow

	# This method is an 'iterator-style' method, which can be chained
	# with other iterators like #each, #inject, and #map.
	#
	# Its first argument is the size of the window we would like to
	# iterate over. It yields each 'window' to the block it's given.
	def each_window(n, &block)

	# If we tried to chop a string of length 4 out of, say, "Foo",
	# we would get non-sensical results. So we implement a guard
	# to prevent Weird Shit like that from happening.
	if self.length < n
	raise "Length must be greater than window size"
	end

	if block_given?

	# Here's the meat of the method. Suppose we're sliding a window
	# of length 2 over this Array:
	#
	# index: 0 1 2
	# array: ['a', 'b', 'c'] (size is 3)
	# slice 1: ['a', 'b']
	# slice 2: [ 'b', 'c']
	#
	# The size of the array itself is 3, and the size of our window
	# is 2. Simple reasoning shows that we can only start our slices
	# at the indices 0 and 1.

	(0..self.size-n).each do \|ix\|
	# At each point, we extract the "slice" of the Array and yield it
	# to the block provided by the calling code.
	yield self.slice(ix..ix + n)
	end

	else
	# On the other hand, if we weren't given a block to yield our slices
	# to, we use the Kernel#to_enum method to create an Enumerator object
	# which we can chain other enumerator-style methods onto. We'll see
	# this in action later.
	to_enum(__method__, n) do
	(self.size - n)
	end
	end
	end
	end

	# Because Array includes #each, #size, and #slice, we can "mix in" the module
	# defined above, which means that any instance of Array will get a copy of the
	# #each_window method. Score!

	Array.include(EachWindow)

	# Now, we implement the actual algorithm described in the problem. It takes
	# an array and a window size. Again, we don't want to abstract too much, but
	# we have an invariant that we want to remove words which are shorter than
	# our window size.
	def sliceify(words, n)

	# First, we take the words we were given. For the sake of example, let's
	# use the array ['pooba', 'baz', 'quux'], with a window size of 4.
	words.

	# Now, we filter out words less than 4 characters long. The return
	# value of #reject is ['pooba', 'quux'].
	reject { \|word\| word.length < n }.

	# Now, we are building a new array, whose elements represent each
	# element of ['pooba', 'quux'] with a transformation applied.
	map { \|word\| word.
	# The #chars method turns a String like
	# "pooba" into an Array like ['p', 'o', 'o', 'b', 'a'].
	chars.

	# Our #each_window method has its chance to shine! It will yield
	# the Arrays ['p', 'o', 'o', 'b'] and ['o', 'o', 'b', 'a']. But
	# we still need to get those into a dictionary-like format.
	each_window(n).

	# Next, we compose our Enumerator with the #each_with_object
	# method. Basically we want to iterate over each of the Arrays
	# above and do something with the word corresponding to it.
	each_with_object(word).

	# Finally, we want to build an Array out of the result of each
	# execution of this block. Within this block, the 'slice' parameter
	# is being provided by #each_window, and the 'word' parameter is
	# being provided by #each_with_object.
	map { \|slice, word\|

	# And now we wave the black-magic wand. In the first execution
	# of this block, 'slice' is the Array ['p', 'o', 'o', 'b'], and
	# 'word' is the String "pooba". We #join the former back into a
	# String, and stuff it into its own little pair, along with the
	# word it was sliced out of.
	[slice.join(""), word]
	}}.

	# Whew! At this point, we have an Array of Arrays which looks like this:
	# [
	# [
	# ['poob', 'pooba'],
	# ['ooba', 'pooba']
	# ],
	# [
	# ['quux', 'quux']
	# ]
	# ]
	#
	# We need to get all those two-element arrays of String back into one
	# 'unified' Array. Because this structure is self-similar in a way, we
	# can #flatten one layer of it and get what we want:
	# [
	# ['poob', 'pooba'],
	# ['ooba', 'pooba']
	# ['quux', 'quux']
	# ]
	flatten(1).

	# Finally, we take each of these Arrays and sort them, using their
	# first element as the key.
	sort_by do \|(slice, word)\|

	# One final bugfix: apparently, upper-case characters are
	# sorted before lower-case characters: "Zot" will be sorted
	# before "alpha". So we make sure to sort these strings based
	# on their lower-case versions.
	#
	# One learns something new every day.
	slice.downcase
	end
	# So now, we have the following array:
	# [
	# ['ooba', 'pooba']
	# ['poob', 'pooba'],
	# ['quux', 'quux']
	# ]
	#
	# Which is what we wanted all along.
	end

	# So now, on to the actual usage of all this! Let's pull the filename
	# out of our script's arguments, with a fallback.
	in_filename = ARGV[1] \|\| "hello_world.in"
	out_filename = ARGV[2] \|\| "hello_world.out"

	# We use the begin..ensure block to make sure we're being good stewards
	# and closing our files, even if an Exception gets thrown somewhere.
	begin

	# Good practice: only open your files with the flags you actually need.
	# We open our input file with a "read-only" flag and our output file with
	# the "write-only" flag.
	infile = File.open(in_filename, "r")
	outfile = File.open(out_filename, "w+")

	# We get each line of the input file, and map the #chomp method over it
	# to remove newline characters.
	words = infile.each_line.map(&:chomp)

	# And finally, we simply use our implementation of the algorithm above...
	slices = sliceify(words, 4)

	# ...and dump each line into our output file. (We could have been really
	# clever and made #sliceify into an enumerator-style ethod like #each_window,
	# but I figure that's pushing things a little bit.)
	slices.each do \|(slice, word)\|
	outfile.puts "#{slice} #{word}"
	end
	ensure

	# And close our files.
	infile.close
	outfile.close
	end

	# fin.