Skip to content

Instantly share code, notes, and snippets.

@thdaraujo
Last active April 8, 2018 02:07
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save thdaraujo/bd7d0992ff343cc2644fc4cd3915b750 to your computer and use it in GitHub Desktop.
Save thdaraujo/bd7d0992ff343cc2644fc4cd3915b750 to your computer and use it in GitHub Desktop.
Tweet Storm generator: divides a giant text into small tweets of 140 characters.
#unit test
#how to run: $ ruby tc_tweetstorm.rb
require_relative "tweetstorm"
require "minitest/autorun"
class TestTweetStorm < Minitest::Test
def setup
@empty_corpus = ""
@small_corpus = File.read("./corpus/small_corpus.txt").chomp("\n")
@large_corpus = File.read("./corpus/large_corpus.txt").chomp("\n")
@empty_corpus_result = ""
@small_corpus_result = File.read("./corpus/small_corpus_result.txt").chomp("\n")
@large_corpus_result = File.read("./corpus/large_corpus_result.txt").chomp("\n")
@small_corpus_without_index_result = File.read("./corpus/small_corpus_without_index_result.txt").chomp("\n")
@large_corpus_without_index_result = File.read("./corpus/large_corpus_without_index_result.txt").chomp("\n")
end
def test_with_empty_corpus_without_index
assert_equal @empty_corpus_result, TweetStorm.storm_without_index(@empty_corpus, 140)
end
def test_with_small_corpus_without_index
assert_equal @small_corpus_without_index_result, TweetStorm.storm_without_index(@small_corpus, 140).join("\n")
end
def test_with_large_corpus_without_index
assert_equal @large_corpus_without_index_result, TweetStorm.storm_without_index(@large_corpus, 140).join("\n")
end
def test_with_empty_corpus
assert_equal @empty_corpus_result, TweetStorm.storm(@empty_corpus, 140)
end
def test_with_small_corpus
assert_equal @small_corpus_result, TweetStorm.storm(@small_corpus, 140).join("\n")
end
def test_with_large_corpus
assert_equal @large_corpus_result, TweetStorm.storm(@large_corpus, 140).join("\n")
end
end
#!/usr/bin/env ruby
class TweetStorm
#divides text into chunks of 140 chars
#without padding the "index/count" at the start
# of the string.
def self.storm_without_index(text, bucket_size)
return "" if text.nil? || text.empty?
tweets = text.scan(/.{1,140}\W/)
.map(&:strip)
size = tweets.size
tweets.map.with_index { |tweet, index|
"#{index + 1}/#{size} " + tweet
}
end
#divides text into tweets of 140 chars
#reserving some space for the "index/count"
def self.storm(text, bucket_size)
return "" if text.nil? || text.empty?
padding = self.approximate_padding_size(text, bucket_size)
word_sizes = text.scan(/\S+/)
.map{|w| {len: w.length, word: w} }
slices = word_sizes.slice_before(sum: 0, count: 1) do |elem, state|
state[:count] += 1
state[:sum] += elem[:len]
#slice the collection before words + spaces > 140
if state[:sum] + state[:count] + padding >= bucket_size
state[:count] = 1
state[:sum] = elem[:len] #last one = start of new slice
else
false
end
end
size = slices.count
slices.map.with_index { |slice, index|
tweet = slice.map{|word_size| word_size[:word] }
.join(' ')
"#{index + 1}/#{size} " << tweet
}
end
private
#First-Fit algorithm: the number of buckets
#is no more than twice the optimal number.
def self.approximate_tweet_count(text, bucket_size)
text_size = text.size
optimal_bucket_count = text_size / bucket_size
optimal_bucket_count * 2
end
#We want to fit the index/count before the tweet
#so we need to reserve some room (padding) for that
#e.g.: 1/150 test blablabla ...
def self.approximate_padding_size(text, bucket_size)
approx_bucket_count = approximate_tweet_count(text, bucket_size)
#rough approximation of digits
approx_bucket_count.to_s.size * 2 + 2
end
end
#=== script =====
text = ARGV.join(' ')
result = TweetStorm.storm(text, 140)
puts "\n\n"
puts result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment