Last active
April 8, 2018 02:07
-
-
Save thdaraujo/bd7d0992ff343cc2644fc4cd3915b750 to your computer and use it in GitHub Desktop.
Tweet Storm generator: divides a giant text into small tweets of 140 characters.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#unit test | |
#how to run: $ ruby tc_tweetstorm.rb | |
require_relative "tweetstorm" | |
require "minitest/autorun" | |
class TestTweetStorm < Minitest::Test | |
def setup | |
@empty_corpus = "" | |
@small_corpus = File.read("./corpus/small_corpus.txt").chomp("\n") | |
@large_corpus = File.read("./corpus/large_corpus.txt").chomp("\n") | |
@empty_corpus_result = "" | |
@small_corpus_result = File.read("./corpus/small_corpus_result.txt").chomp("\n") | |
@large_corpus_result = File.read("./corpus/large_corpus_result.txt").chomp("\n") | |
@small_corpus_without_index_result = File.read("./corpus/small_corpus_without_index_result.txt").chomp("\n") | |
@large_corpus_without_index_result = File.read("./corpus/large_corpus_without_index_result.txt").chomp("\n") | |
end | |
def test_with_empty_corpus_without_index | |
assert_equal @empty_corpus_result, TweetStorm.storm_without_index(@empty_corpus, 140) | |
end | |
def test_with_small_corpus_without_index | |
assert_equal @small_corpus_without_index_result, TweetStorm.storm_without_index(@small_corpus, 140).join("\n") | |
end | |
def test_with_large_corpus_without_index | |
assert_equal @large_corpus_without_index_result, TweetStorm.storm_without_index(@large_corpus, 140).join("\n") | |
end | |
def test_with_empty_corpus | |
assert_equal @empty_corpus_result, TweetStorm.storm(@empty_corpus, 140) | |
end | |
def test_with_small_corpus | |
assert_equal @small_corpus_result, TweetStorm.storm(@small_corpus, 140).join("\n") | |
end | |
def test_with_large_corpus | |
assert_equal @large_corpus_result, TweetStorm.storm(@large_corpus, 140).join("\n") | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
class TweetStorm | |
#divides text into chunks of 140 chars | |
#without padding the "index/count" at the start | |
# of the string. | |
def self.storm_without_index(text, bucket_size) | |
return "" if text.nil? || text.empty? | |
tweets = text.scan(/.{1,140}\W/) | |
.map(&:strip) | |
size = tweets.size | |
tweets.map.with_index { |tweet, index| | |
"#{index + 1}/#{size} " + tweet | |
} | |
end | |
#divides text into tweets of 140 chars | |
#reserving some space for the "index/count" | |
def self.storm(text, bucket_size) | |
return "" if text.nil? || text.empty? | |
padding = self.approximate_padding_size(text, bucket_size) | |
word_sizes = text.scan(/\S+/) | |
.map{|w| {len: w.length, word: w} } | |
slices = word_sizes.slice_before(sum: 0, count: 1) do |elem, state| | |
state[:count] += 1 | |
state[:sum] += elem[:len] | |
#slice the collection before words + spaces > 140 | |
if state[:sum] + state[:count] + padding >= bucket_size | |
state[:count] = 1 | |
state[:sum] = elem[:len] #last one = start of new slice | |
else | |
false | |
end | |
end | |
size = slices.count | |
slices.map.with_index { |slice, index| | |
tweet = slice.map{|word_size| word_size[:word] } | |
.join(' ') | |
"#{index + 1}/#{size} " << tweet | |
} | |
end | |
private | |
#First-Fit algorithm: the number of buckets | |
#is no more than twice the optimal number. | |
def self.approximate_tweet_count(text, bucket_size) | |
text_size = text.size | |
optimal_bucket_count = text_size / bucket_size | |
optimal_bucket_count * 2 | |
end | |
#We want to fit the index/count before the tweet | |
#so we need to reserve some room (padding) for that | |
#e.g.: 1/150 test blablabla ... | |
def self.approximate_padding_size(text, bucket_size) | |
approx_bucket_count = approximate_tweet_count(text, bucket_size) | |
#rough approximation of digits | |
approx_bucket_count.to_s.size * 2 + 2 | |
end | |
end | |
#=== script ===== | |
text = ARGV.join(' ') | |
result = TweetStorm.storm(text, 140) | |
puts "\n\n" | |
puts result |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment