Created
August 12, 2011 09:18
-
-
Save JamesHarrison/1141758 to your computer and use it in GitHub Desktop.
Twitter streaming API to Graphite bridge. Pushes pure volume stats and some other things. Just playing around.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require "rubygems" | |
require "bundler/setup" | |
require "tweetstream" | |
require "simple-graphite" | |
class TwitterStats | |
def initialize | |
@client = TweetStream::Client.new('username', 'password').on_limit do |disc_count| | |
puts "WARNING: Got limited, lost #{disc_count.to_s}" | |
end.on_error do |message| | |
puts "ERROR: #{message}" | |
end | |
@g = Graphite.new({:host => "localhost", :port => 2003}) | |
@terms = [] | |
end | |
def add_term(term) | |
@terms.push(term) | |
end | |
# Runs the stream client | |
def run | |
# Initializes the thingy. | |
mutex = Mutex.new | |
counts = {} | |
rt_counts = {} | |
link_counts = {} | |
space_char_ratio = {} | |
mentions = {} | |
total = 0 | |
total_rt = 0 | |
total_geo = 0 | |
@terms.each do |term| counts[term] = 0; end | |
@terms.each do |term| rt_counts[term] = 0; end | |
@terms.each do |term| link_counts[term] = 0; end | |
@terms.each do |term| space_char_ratio[term] = 0; end | |
@terms.each do |term| mentions[term] = 0; end | |
# EventMachine, GO | |
EventMachine.run do | |
@client.filter(:track=>@terms) do |tweet| | |
if true #tweet.lang == 'en' and tweet.followers > 15 | |
# Which terms does this tweet relate to? | |
puts "Tweet- lang #{tweet.lang rescue 'n/a'} retweet #{tweet.retweeted rescue 'n/a'} geo #{tweet.geo != nil rescue 'n/a'} rtc #{tweet.retweet_count rescue 'n/a'} #{tweet.text}" | |
total += 1 | |
total_rt += 1 if tweet.text.include?("RT ") or (tweet.reweeted rescue false) | |
total_geo += 1 if (tweet.geo != nil rescue false) | |
@terms.each do |term| | |
mutex.synchronize do | |
if tweet.text.include?(term) | |
counts[term] += 1 | |
rt_counts[term] += 1 if tweet.text.include?("RT ") or (tweet.reweeted rescue false) | |
link_counts[term] += 1 if tweet.text.include?("http://") | |
mentions[term] += 1 if tweet.text.include?("@") | |
space_char_ratio[term] += tweet.text.length/tweet.text.count(" ") rescue 0 | |
end | |
end | |
end | |
end | |
end | |
EM.add_periodic_timer(1) do | |
puts "Wiping counts to Graphite" | |
@g.push_to_graphite do |graphite| | |
@terms.each do |term| | |
mutex.synchronize do | |
safe_term = term.gsub("#","hashtag.").gsub(" ", "_") | |
graphite.puts "twitterstats.terms.#{safe_term}.count #{counts[term].to_i} #{@g.time_now}" | |
graphite.puts "twitterstats.terms.#{safe_term}.retweet_count #{rt_counts[term].to_i} #{@g.time_now}" | |
graphite.puts "twitterstats.terms.#{safe_term}.link_count #{link_counts[term].to_i} #{@g.time_now}" | |
graphite.puts "twitterstats.terms.#{safe_term}.space_ratio #{space_char_ratio[term]/counts[term] rescue 0} #{@g.time_now}" | |
graphite.puts "twitterstats.terms.#{safe_term}.mention_count #{mentions[term].to_i} #{@g.time_now}" | |
graphite.puts "twitterstats.total.count #{total.to_i} #{@g.time_now}" | |
graphite.puts "twitterstats.total.retweets #{total_rt.to_i} #{@g.time_now}" | |
graphite.puts "twitterstats.total.geospatial #{total_geo.to_i} #{@g.time_now}" | |
end | |
end | |
end | |
mutex.synchronize do | |
@terms.each do |term| counts[term] = 0; end | |
@terms.each do |term| rt_counts[term] = 0; end | |
@terms.each do |term| link_counts[term] = 0; end | |
@terms.each do |term| space_char_ratio[term] = 0; end | |
@terms.each do |term| mentions[term] = 0; end | |
total = 0 | |
total_rt = 0 | |
total_geo = 0 | |
end | |
end | |
end | |
end | |
end | |
t = TwitterStats.new | |
t.add_term('Cameron') | |
t.add_term('Clegg') | |
t.add_term('police') | |
t.add_term('UK') | |
t.add_term('riots') | |
t.add_term('Milliband') | |
t.add_term('#ukriots') | |
t.add_term('#londonriots') | |
t.add_term('post-riot') | |
t.add_term('#riotcleanup') | |
t.add_term('CCTV') | |
t.add_term('human rights') | |
t.add_term('Theresa May') | |
t.add_term('protest') | |
t.add_term('Tottenham') | |
t.add_term('Mark Duggan') | |
t.add_term('BBC') | |
t.add_term('raid') | |
t.add_term('raided') | |
t.run |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment