Skip to content

Instantly share code, notes, and snippets.

@sukhchander
Created May 11, 2011 16:46
Show Gist options
  • Save sukhchander/966846 to your computer and use it in GitHub Desktop.
Save sukhchander/966846 to your computer and use it in GitHub Desktop.
search twitter for the latest keyword / hashtag
# encoding: UTF-8
require 'rubygems'
require 'set'
require 'uri'
require 'yajl'
require 'yajl/http_stream'
#####################################
# TwitterFeed
# represents the twitter feed / stream
# utilities to extract information from tweets (url extraction is currently supported)
#####################################
class TwitterFeed
private
URL_REGEX = /((http|https):\/\/\S+)/mixs
#URL_REGEX = /(http|https:\/\/[a-z0-9]+([-.]{1}[a-z0-9]+)*.[a-z]{2,5}(([0-9]{1,5})?\/))\S+/mixs
public
def self.parse(enumerable=[],type=:url)
case type
when :url
extract_urls enumerable
else
raise ArugmentError, "INVALID EXTRACTION TYPE"
end
end
private
def self.extract_urls tweets
begin
raise ArgumentError, "EMPTY TWEETS. MAYBE NEED TO RETWEET." if tweets.nil? or 0==tweets.size
extracted_links=Set.new
tweets.each do |tweet|
links = tweet.scan URL_REGEX
extracted_links.merge links.flatten
end
extracted_links.reject!{|url| url.eql?"http" or url.eql?"https"}.to_a.sort
rescue Exception => e
raise e
end
end
end
#####################################
# Twitter
# represents a simple wrapper around the twitter api
# utilities to query twitter (hashtag search is currently supported)
#####################################
class Twitter
private
TWEETS_RECENT = 100
TWEETS_PER_PAGE = 15
TWEETS_RECENT_MAX = 1500
TWITTER_SEARCH_URL = "http://search.twitter.com/search.json?result_type=recent&rpp=#{TWEETS_RECENT}"
URI_ESCAPE = Regexp.new("[^#{URI::PATTERN::UNRESERVED}]")
public
def self.search q='',num=TWEETS_RECENT
tweets(q,num)
end
private
def self.tweets(q,num)
begin
raise ArgumentError, "INVALID SEARCH QUERY" if q.nil? or q.eql?''
tweets = []
Yajl::HttpStream.get(search_url(q,num),:symbolize_keys => true) do |response|
results = response[:results]
if 0 != results.size
results.each {|result| tweets << result[:text]}
end
end
tweets
rescue Exception => e
raise e
end
end
def self.search_url(q,num,type=:results)
case type
when :results
search_url_results q,num
when :page
search_url_page q
else
raise ArgumentError, "INVALID SEARCH URL TYPE"
end
end
def self.search_url_results(q,results=TWEETS_RECENT)
begin
raise ArgumentError, "INVALID RESULTS PER PAGE REQUESTED" if results >= TWEETS_RECENT_MAX
q_escaped = URI.escape(q.chomp,URI_ESCAPE)
search_url = "#{TWITTER_SEARCH_URL}&q=#{q_escaped}"
URI.parse search_url
rescue Exception => e
raise e
end
end
def self.search_url_page(q,page=1)
begin
raise ArgumentError, "INVALID PAGE NUMBER" if page < 1
q_escaped = URI.escape(q.chomp,URI_ESCAPE)
search_url = "#{TWITTER_SEARCH_URL}&q=#{q_escaped}&page=#{page}"
URI.parse search_url
rescue Exception => e
raise e
end
end
end
# twitter_search.rb
#
# perform a twitter search for a hashtag extract links if necessary
# ie:
# ruby twitter_search.rb <#hashtag>
if $0 == __FILE__
RECENT_TWEETS=100
hashtag = nil
usage = "\nusage: ruby twitter_search.rb hashtag \
\n\teg: ruby twitter_search.rb '#puravida'"
unless hashtag = ARGV[0]
puts usage if hashtag.nil? or hashtag.empty?
else
begin
if hashtag.eql?"" or hashtag.eql?"#"
puts "\nplease specify a valid hashtag. ie: #puravida\n"
else
if hashtag =~ (/^\#/) and not hashtag.eql?"#"
puts "\nsearching twitter for tweets that contain #{hashtag}\n"
twitter_search = Twitter.search hashtag,RECENT_TWEETS
if 0 != twitter_search.size
twitter_search_urls = TwitterFeed.parse twitter_search
puts "\n#{twitter_search_urls.size} unique urls found in the last #{RECENT_TWEETS} tweets that contain #{hashtag}\n"
puts
twitter_search_urls.each_with_index {|item,idx| puts "#{idx+1}. #{item}"}
puts
else
puts "\nno results for #{hashtag}. try another hashtag.\n"
end
else
puts "\nonly hashtag searches are supported\n"
puts usage
end
end
rescue Exception => e
puts e
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment