Created
April 21, 2014 00:39
-
-
Save chadbrewbaker/11129124 to your computer and use it in GitHub Desktop.
Twitter stream stats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'twitter' | |
require 'uri' | |
require 'emoji_data' | |
smile_emoji = "😄" | |
def nilstuff(a,stuff="") | |
if(a.nil?) | |
return stuff | |
else | |
return a | |
end | |
end | |
# Go to https://apps.twitter.com and register yourself to get your API keys. | |
client = Twitter::Streaming::Client.new do |config| | |
config.consumer_key = "" | |
config.consumer_secret = "" | |
config.access_token = "" | |
config.access_token_secret = "" | |
end | |
tweet_counter = 0 | |
emoji_histo = {} | |
hashtag_histo ={} | |
uri_histo ={} | |
uri_tweet_counter = 0 | |
emoji_tweet_counter = 0 | |
start_time = Time.new | |
client.sample do |object| | |
if object.is_a?(Twitter::Tweet) | |
tweet_counter = tweet_counter+1 | |
str = object.text | |
add_emoji = 0 | |
matches = EmojiData::EMOJI_CHARS.select{ |ec| str.include? ec.char } | |
matches.each{ |matched_char| | |
add_emoji = 1 | |
emoji_histo[matched_char.char] = 1 + nilstuff(emoji_histo[matched_char.char], 0) | |
} | |
emoji_tweet_counter = emoji_tweet_counter + add_emoji | |
mat = /#(\w+)/.match(str) | |
if(mat != nil) | |
mat.captures.each{ |cap| | |
hashtag_histo[cap] = 1 + nilstuff(hashtag_histo[cap],0) | |
} | |
end | |
#Also builtin to Twitter gem: object.hashtags.each{|h| } | |
add_uri = 0 | |
object.uris.each{|h| | |
add_uri = 1 | |
begin | |
uri = URI(h.expanded_url).host | |
uri_histo[uri] = 1 + nilstuff(uri_histo[uri],0) | |
rescue | |
#ignore malformed URIs | |
end | |
} | |
uri_tweet_counter = uri_tweet_counter + add_uri | |
end | |
if(tweet_counter % 100 == 0) | |
puts "========================================" | |
puts "Total number of tweets received: #{tweet_counter.inspect} tweets" | |
puts "Average tweets per hour/minute/second: " + (tweet_counter/(Time.now - start_time)).inspect + " tweets/sec" | |
puts "Top emojis in tweets: " + emoji_histo.sort_by{|k,v| v}.last(5).reverse.inspect | |
puts "Percent of tweets that contains emojis: " + ((100.0 *emoji_tweet_counter)/tweet_counter).to_s + "\%" | |
puts "Top hashtags: " + hashtag_histo.sort_by{|k,v| v}.last(5).reverse.inspect | |
url_pct = ((100.0 *uri_tweet_counter)/tweet_counter).to_s | |
puts "Percent of tweets that contain a url: " + url_pct + "\%" | |
pic_pct = ((100.0 *(nilstuff(uri_histo["instagram.com"],0) + nilstuff(uri_histo["pic.twitter.com"],0) ) )/tweet_counter).to_s | |
puts "Percent of tweets that contain a photo url (pic.twitter.com or instagram): " + pic_pct + "\%" | |
puts "Top domains of urls in tweets: " + uri_histo.sort_by{|k,v| v}.last(5).reverse.inspect | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment