thdaraujo/tc_tweetstorm.rb

## tc_tweetstorm.rb

#unit test
#how to run: $ ruby tc_tweetstorm.rb

require_relative "tweetstorm"
require "minitest/autorun"

class TestTweetStorm  < Minitest::Test

  def setup
    @empty_corpus = ""
    @small_corpus = File.read("./corpus/small_corpus.txt").chomp("\n")
    @large_corpus = File.read("./corpus/large_corpus.txt").chomp("\n")

    @empty_corpus_result = ""
    @small_corpus_result = File.read("./corpus/small_corpus_result.txt").chomp("\n")
    @large_corpus_result = File.read("./corpus/large_corpus_result.txt").chomp("\n")

    @small_corpus_without_index_result = File.read("./corpus/small_corpus_without_index_result.txt").chomp("\n")
    @large_corpus_without_index_result = File.read("./corpus/large_corpus_without_index_result.txt").chomp("\n")

  end

  def test_with_empty_corpus_without_index
    assert_equal @empty_corpus_result, TweetStorm.storm_without_index(@empty_corpus, 140)
  end

  def test_with_small_corpus_without_index
    assert_equal @small_corpus_without_index_result, TweetStorm.storm_without_index(@small_corpus, 140).join("\n")
  end

  def test_with_large_corpus_without_index
    assert_equal @large_corpus_without_index_result, TweetStorm.storm_without_index(@large_corpus, 140).join("\n")
  end

  def test_with_empty_corpus
    assert_equal @empty_corpus_result, TweetStorm.storm(@empty_corpus, 140)
  end

  def test_with_small_corpus
    assert_equal @small_corpus_result, TweetStorm.storm(@small_corpus, 140).join("\n")
  end

  def test_with_large_corpus
    assert_equal @large_corpus_result, TweetStorm.storm(@large_corpus, 140).join("\n")
  end

end

## tweetstorm.rb
#!/usr/bin/env ruby

class TweetStorm
  #divides text into chunks of 140 chars
  #without padding the "index/count" at the start
  # of the string.
  def self.storm_without_index(text, bucket_size)
    return "" if text.nil? || text.empty?
    tweets = text.scan(/.{1,140}\W/)
                 .map(&:strip)
    size   = tweets.size
    tweets.map.with_index { |tweet, index|
      "#{index + 1}/#{size} " + tweet
    }
  end

  #divides text into tweets of 140 chars
  #reserving some space for the "index/count"
  def self.storm(text, bucket_size)
    return "" if text.nil? || text.empty?
    padding = self.approximate_padding_size(text, bucket_size)
    word_sizes = text.scan(/\S+/)
                     .map{|w| {len: w.length, word: w} }

    slices = word_sizes.slice_before(sum: 0, count: 1) do |elem, state|
      state[:count] += 1
      state[:sum] += elem[:len]
      #slice the collection before words + spaces > 140
      if state[:sum] + state[:count] + padding >= bucket_size
        state[:count] = 1
        state[:sum] = elem[:len] #last one = start of new slice
      else
        false
      end
    end

    size = slices.count
    slices.map.with_index { |slice, index|
      tweet = slice.map{|word_size| word_size[:word] }
                   .join(' ')
      "#{index + 1}/#{size} " << tweet
    }
  end

  private

  #First-Fit algorithm: the number of buckets
  #is no more than twice the optimal number.
  def self.approximate_tweet_count(text, bucket_size)
    text_size            = text.size
    optimal_bucket_count = text_size / bucket_size
    optimal_bucket_count * 2
  end

  #We want to fit the index/count before the tweet
  #so we need to reserve some room (padding) for that
  #e.g.: 1/150 test blablabla ...
  def self.approximate_padding_size(text, bucket_size)
    approx_bucket_count = approximate_tweet_count(text, bucket_size)
    #rough approximation of digits
    approx_bucket_count.to_s.size * 2 + 2
  end
end


#=== script =====

text = ARGV.join(' ')
result = TweetStorm.storm(text, 140)
puts "\n\n"
puts result

	#unit test
	#how to run: $ ruby tc_tweetstorm.rb

	require_relative "tweetstorm"
	require "minitest/autorun"

	class TestTweetStorm < Minitest::Test

	def setup
	@empty_corpus = ""
	@small_corpus = File.read("./corpus/small_corpus.txt").chomp("\n")
	@large_corpus = File.read("./corpus/large_corpus.txt").chomp("\n")

	@empty_corpus_result = ""
	@small_corpus_result = File.read("./corpus/small_corpus_result.txt").chomp("\n")
	@large_corpus_result = File.read("./corpus/large_corpus_result.txt").chomp("\n")

	@small_corpus_without_index_result = File.read("./corpus/small_corpus_without_index_result.txt").chomp("\n")
	@large_corpus_without_index_result = File.read("./corpus/large_corpus_without_index_result.txt").chomp("\n")

	end

	def test_with_empty_corpus_without_index
	assert_equal @empty_corpus_result, TweetStorm.storm_without_index(@empty_corpus, 140)
	end

	def test_with_small_corpus_without_index
	assert_equal @small_corpus_without_index_result, TweetStorm.storm_without_index(@small_corpus, 140).join("\n")
	end

	def test_with_large_corpus_without_index
	assert_equal @large_corpus_without_index_result, TweetStorm.storm_without_index(@large_corpus, 140).join("\n")
	end

	def test_with_empty_corpus
	assert_equal @empty_corpus_result, TweetStorm.storm(@empty_corpus, 140)
	end

	def test_with_small_corpus
	assert_equal @small_corpus_result, TweetStorm.storm(@small_corpus, 140).join("\n")
	end

	def test_with_large_corpus
	assert_equal @large_corpus_result, TweetStorm.storm(@large_corpus, 140).join("\n")
	end

	end
	#!/usr/bin/env ruby

	class TweetStorm
	#divides text into chunks of 140 chars
	#without padding the "index/count" at the start
	# of the string.
	def self.storm_without_index(text, bucket_size)
	return "" if text.nil? \|\| text.empty?
	tweets = text.scan(/.{1,140}\W/)
	.map(&:strip)
	size = tweets.size
	tweets.map.with_index { \|tweet, index\|
	"#{index + 1}/#{size} " + tweet
	}
	end

	#divides text into tweets of 140 chars
	#reserving some space for the "index/count"
	def self.storm(text, bucket_size)
	return "" if text.nil? \|\| text.empty?
	padding = self.approximate_padding_size(text, bucket_size)
	word_sizes = text.scan(/\S+/)
	.map{\|w\| {len: w.length, word: w} }

	slices = word_sizes.slice_before(sum: 0, count: 1) do \|elem, state\|
	state[:count] += 1
	state[:sum] += elem[:len]
	#slice the collection before words + spaces > 140
	if state[:sum] + state[:count] + padding >= bucket_size
	state[:count] = 1
	state[:sum] = elem[:len] #last one = start of new slice
	else
	false
	end
	end

	size = slices.count
	slices.map.with_index { \|slice, index\|
	tweet = slice.map{\|word_size\| word_size[:word] }
	.join(' ')
	"#{index + 1}/#{size} " << tweet
	}
	end

	private

	#First-Fit algorithm: the number of buckets
	#is no more than twice the optimal number.
	def self.approximate_tweet_count(text, bucket_size)
	text_size = text.size
	optimal_bucket_count = text_size / bucket_size
	optimal_bucket_count * 2
	end

	#We want to fit the index/count before the tweet
	#so we need to reserve some room (padding) for that
	#e.g.: 1/150 test blablabla ...
	def self.approximate_padding_size(text, bucket_size)
	approx_bucket_count = approximate_tweet_count(text, bucket_size)
	#rough approximation of digits
	approx_bucket_count.to_s.size * 2 + 2
	end
	end


	#=== script =====

	text = ARGV.join(' ')
	result = TweetStorm.storm(text, 140)
	puts "\n\n"
	puts result