avdi/unindent.rb

## unindent.rb
TEXT = <<EOF
   See, the interesting thing about this text
     is that while it seems like the first line defines an indent
       it's actually the last line which has the smallest indent

    there are also some blank lines

    both with and without extra spaces in them

    and it just goes on and on

       this text

    and starts to repeat itself
   See, the interesting thing about this text
     is that while it seems like the first line defines an indent
       it's actually the last line which has the smallest indent

    there are also some blank lines

    both with and without extra spaces in them

    and it just goes on and on

       this text

    and starts to repeat itself
   See, the interesting thing about this text
     is that while it seems like the first line defines an indent
       it's actually the last line which has the smallest indent

    there are also some blank lines

    both with and without extra spaces in them

    and it just goes on and on

       this text

    and starts to repeat itself

  The End.
EOF

EXPECTED_TEXT = <<EOF
 See, the interesting thing about this text
   is that while it seems like the first line defines an indent
     it's actually the last line which has the smallest indent

  there are also some blank lines

  both with and without extra spaces in them

  and it just goes on and on

     this text

  and starts to repeat itself
 See, the interesting thing about this text
   is that while it seems like the first line defines an indent
     it's actually the last line which has the smallest indent

  there are also some blank lines

  both with and without extra spaces in them

  and it just goes on and on

     this text

  and starts to repeat itself
 See, the interesting thing about this text
   is that while it seems like the first line defines an indent
     it's actually the last line which has the smallest indent

  there are also some blank lines

  both with and without extra spaces in them

  and it just goes on and on

     this text

  and starts to repeat itself

The End.
EOF

require "minitest/autorun"
require "minitest"
require "active_support/core_ext/string"
require "unindent"

class String

  # Try to optimize by avoiding splitting into an array
  def unindent_scan
    indent_str = nil
    scan(/^[\t ]*(?=\S)/) do |s|
      indent_str ||= s
      indent_str = (s.size < indent_str.size) ? s : indent_str
    end
    if indent_str
      gsub(/^#{indent_str}/, "")
    end
  end

  # This version tries to avoid a second traversal of the string by
  # saving a list of offsets. However, it winds up being
  # slower. Perhaps this is a sign of how well-optimized #gsub is?
  def unindent_offsets
    min_indent = nil
    offsets    = []
    scan(/^[\t ]*(?=\S)/) do |s|
      offsets << $~.offset(0).first
      min_indent ||= s.size
      min_indent = (s.size < min_indent.size) ? s.size : min_indent
    end
    if min_indent
      result = dup
      shift  = 0
      offsets.each do |offset|
        result[offset - shift, min_indent] = ""
        shift += min_indent
      end
      result
    else
      self
    end
  end
end

class TestUnindent < MiniTest::Unit::TestCase
  def test_unindent_gem
    assert_equal EXPECTED_TEXT, TEXT.unindent
  end

  def test_activesupport
    assert_equal EXPECTED_TEXT, TEXT.strip_heredoc
  end

  def test_scan
    assert_equal EXPECTED_TEXT, TEXT.unindent_scan
  end

  def test_offsets
    assert_equal EXPECTED_TEXT, TEXT.unindent_offsets
  end
end

require "benchmark"

n = 10_000

Benchmark.bmbm(15) do |x|
  x.report("unindent gem") { n.times { result = TEXT.unindent } }
  x.report("activesupport") { n.times { result = TEXT.strip_heredoc } }
  x.report("scan") { n.times { result = TEXT.unindent_scan } }
  x.report("offsets") { n.times { result = TEXT.unindent_offsets } }
end

# ~> MiniTest::Unit::TestCase is now Minitest::Test. From -:130:in `<main>'
# >> Rehearsal ---------------------------------------------------
# >> unindent gem      0.850000   0.000000   0.850000 (  0.859678)
# >> activesupport     0.630000   0.000000   0.630000 (  0.638381)
# >> scan              0.510000   0.000000   0.510000 (  0.502865)
# >> offsets           0.710000   0.010000   0.720000 (  0.721911)
# >> ------------------------------------------ total: 2.710000sec
# >>
# >>                       user     system      total        real
# >> unindent gem      0.770000   0.000000   0.770000 (  0.773135)
# >> activesupport     0.620000   0.000000   0.620000 (  0.627442)
# >> scan              0.490000   0.000000   0.490000 (  0.497729)
# >> offsets           0.700000   0.000000   0.700000 (  0.704367)
# >> Run options: --seed 50148
# >>
# >> # Running:
# >>
# >> ....
# >>
# >> Finished in 0.001476s, 2710.3833 runs/s, 2710.3833 assertions/s.
# >>
# >> 4 runs, 4 assertions, 0 failures, 0 errors, 0 skips
	TEXT = <<EOF
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself

	The End.
	EOF

	EXPECTED_TEXT = <<EOF
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself
	See, the interesting thing about this text
	is that while it seems like the first line defines an indent
	it's actually the last line which has the smallest indent

	there are also some blank lines

	both with and without extra spaces in them

	and it just goes on and on

	this text

	and starts to repeat itself

	The End.
	EOF

	require "minitest/autorun"
	require "minitest"
	require "active_support/core_ext/string"
	require "unindent"

	class String

	# Try to optimize by avoiding splitting into an array
	def unindent_scan
	indent_str = nil
	scan(/^[\t ]*(?=\S)/) do \|s\|
	indent_str \|\|= s
	indent_str = (s.size < indent_str.size) ? s : indent_str
	end
	if indent_str
	gsub(/^#{indent_str}/, "")
	end
	end

	# This version tries to avoid a second traversal of the string by
	# saving a list of offsets. However, it winds up being
	# slower. Perhaps this is a sign of how well-optimized #gsub is?
	def unindent_offsets
	min_indent = nil
	offsets = []
	scan(/^[\t ]*(?=\S)/) do \|s\|
	offsets << $~.offset(0).first
	min_indent \|\|= s.size
	min_indent = (s.size < min_indent.size) ? s.size : min_indent
	end
	if min_indent
	result = dup
	shift = 0
	offsets.each do \|offset\|
	result[offset - shift, min_indent] = ""
	shift += min_indent
	end
	result
	else
	self
	end
	end
	end

	class TestUnindent < MiniTest::Unit::TestCase
	def test_unindent_gem
	assert_equal EXPECTED_TEXT, TEXT.unindent
	end

	def test_activesupport
	assert_equal EXPECTED_TEXT, TEXT.strip_heredoc
	end

	def test_scan
	assert_equal EXPECTED_TEXT, TEXT.unindent_scan
	end

	def test_offsets
	assert_equal EXPECTED_TEXT, TEXT.unindent_offsets
	end
	end

	require "benchmark"

	n = 10_000

	Benchmark.bmbm(15) do \|x\|
	x.report("unindent gem") { n.times { result = TEXT.unindent } }
	x.report("activesupport") { n.times { result = TEXT.strip_heredoc } }
	x.report("scan") { n.times { result = TEXT.unindent_scan } }
	x.report("offsets") { n.times { result = TEXT.unindent_offsets } }
	end

	# ~> MiniTest::Unit::TestCase is now Minitest::Test. From -:130:in `<main>'
	# >> Rehearsal ---------------------------------------------------
	# >> unindent gem 0.850000 0.000000 0.850000 ( 0.859678)
	# >> activesupport 0.630000 0.000000 0.630000 ( 0.638381)
	# >> scan 0.510000 0.000000 0.510000 ( 0.502865)
	# >> offsets 0.710000 0.010000 0.720000 ( 0.721911)
	# >> ------------------------------------------ total: 2.710000sec
	# >>
	# >> user system total real
	# >> unindent gem 0.770000 0.000000 0.770000 ( 0.773135)
	# >> activesupport 0.620000 0.000000 0.620000 ( 0.627442)
	# >> scan 0.490000 0.000000 0.490000 ( 0.497729)
	# >> offsets 0.700000 0.000000 0.700000 ( 0.704367)
	# >> Run options: --seed 50148
	# >>
	# >> # Running:
	# >>
	# >> ....
	# >>
	# >> Finished in 0.001476s, 2710.3833 runs/s, 2710.3833 assertions/s.
	# >>
	# >> 4 runs, 4 assertions, 0 failures, 0 errors, 0 skips