sukhchander/twitter_search.rb

## twitter_search.rb
# encoding: UTF-8
require 'rubygems'
require 'set'
require 'uri'
require 'yajl'
require 'yajl/http_stream'

#####################################
# TwitterFeed
# represents the twitter feed / stream
# utilities to extract information from tweets (url extraction is currently supported)
#####################################
class TwitterFeed
private
  URL_REGEX = /((http|https):\/\/\S+)/mixs
  #URL_REGEX = /(http|https:\/\/[a-z0-9]+([-.]{1}[a-z0-9]+)*.[a-z]{2,5}(([0-9]{1,5})?\/))\S+/mixs

public
  def self.parse(enumerable=[],type=:url)
    case type
    when :url
      extract_urls enumerable
    else
      raise ArugmentError, "INVALID EXTRACTION TYPE"
    end
  end

private
  def self.extract_urls tweets
    begin
      raise ArgumentError, "EMPTY TWEETS. MAYBE NEED TO RETWEET." if tweets.nil? or 0==tweets.size

      extracted_links=Set.new
      tweets.each do |tweet|
        links = tweet.scan URL_REGEX
        extracted_links.merge links.flatten
      end
      extracted_links.reject!{|url| url.eql?"http" or url.eql?"https"}.to_a.sort
    rescue Exception => e
      raise e
    end
  end
end

#####################################
# Twitter
# represents a simple wrapper around the twitter api
# utilities to query twitter (hashtag search is currently supported)
#####################################
class Twitter
private
  TWEETS_RECENT = 100
  TWEETS_PER_PAGE = 15
  TWEETS_RECENT_MAX = 1500
  TWITTER_SEARCH_URL = "http://search.twitter.com/search.json?result_type=recent&rpp=#{TWEETS_RECENT}"
  URI_ESCAPE = Regexp.new("[^#{URI::PATTERN::UNRESERVED}]")

public
  def self.search q='',num=TWEETS_RECENT
    tweets(q,num)
  end

private
  def self.tweets(q,num)
    begin
      raise ArgumentError, "INVALID SEARCH QUERY" if q.nil? or q.eql?''

      tweets = []
      Yajl::HttpStream.get(search_url(q,num),:symbolize_keys => true) do |response|
        results = response[:results]
        if 0 != results.size
          results.each {|result| tweets << result[:text]}
        end
      end
      tweets
    rescue Exception => e
      raise e
    end
  end

  def self.search_url(q,num,type=:results)
    case type
    when :results
      search_url_results q,num
    when :page
      search_url_page q
    else
      raise ArgumentError, "INVALID SEARCH URL TYPE"
    end
  end

  def self.search_url_results(q,results=TWEETS_RECENT)
    begin
      raise ArgumentError, "INVALID RESULTS PER PAGE REQUESTED" if results >= TWEETS_RECENT_MAX

      q_escaped = URI.escape(q.chomp,URI_ESCAPE)
      search_url = "#{TWITTER_SEARCH_URL}&q=#{q_escaped}"
      URI.parse search_url
    rescue Exception => e
      raise e
    end
  end

  def self.search_url_page(q,page=1)
    begin
      raise ArgumentError, "INVALID PAGE NUMBER" if page < 1

      q_escaped = URI.escape(q.chomp,URI_ESCAPE)
      search_url = "#{TWITTER_SEARCH_URL}&q=#{q_escaped}&page=#{page}"
      URI.parse search_url
    rescue Exception => e
      raise e
    end
  end

end

# twitter_search.rb
#
# perform a twitter search for a hashtag extract links if necessary
# ie:
#   ruby twitter_search.rb <#hashtag>
if $0 == __FILE__
  RECENT_TWEETS=100
  hashtag = nil
  usage = "\nusage: ruby twitter_search.rb hashtag \
              \n\teg: ruby twitter_search.rb '#puravida'"
  unless hashtag = ARGV[0]
    puts usage if hashtag.nil? or hashtag.empty?
  else
    begin
      if hashtag.eql?"" or hashtag.eql?"#"
        puts "\nplease specify a valid hashtag. ie: #puravida\n"
      else
        if hashtag =~ (/^\#/) and not hashtag.eql?"#"
          puts "\nsearching twitter for tweets that contain #{hashtag}\n"
          twitter_search = Twitter.search hashtag,RECENT_TWEETS
          if 0 != twitter_search.size
            twitter_search_urls = TwitterFeed.parse twitter_search
            puts "\n#{twitter_search_urls.size} unique urls found in the last #{RECENT_TWEETS} tweets that contain #{hashtag}\n"
            puts
            twitter_search_urls.each_with_index {|item,idx| puts "#{idx+1}. #{item}"}
            puts
          else
            puts "\nno results for #{hashtag}. try another hashtag.\n"
          end
        else
          puts "\nonly hashtag searches are supported\n"
          puts usage
        end
      end
    rescue Exception => e
      puts e
    end
  end
end
	# encoding: UTF-8
	require 'rubygems'
	require 'set'
	require 'uri'
	require 'yajl'
	require 'yajl/http_stream'

	#####################################
	# TwitterFeed
	# represents the twitter feed / stream
	# utilities to extract information from tweets (url extraction is currently supported)
	#####################################
	class TwitterFeed
	private
	URL_REGEX = /((http\|https):\/\/\S+)/mixs
	#URL_REGEX = /(http\|https:\/\/[a-z0-9]+([-.]{1}[a-z0-9]+)*.[a-z]{2,5}(([0-9]{1,5})?\/))\S+/mixs

	public
	def self.parse(enumerable=[],type=:url)
	case type
	when :url
	extract_urls enumerable
	else
	raise ArugmentError, "INVALID EXTRACTION TYPE"
	end
	end

	private
	def self.extract_urls tweets
	begin
	raise ArgumentError, "EMPTY TWEETS. MAYBE NEED TO RETWEET." if tweets.nil? or 0==tweets.size

	extracted_links=Set.new
	tweets.each do \|tweet\|
	links = tweet.scan URL_REGEX
	extracted_links.merge links.flatten
	end
	extracted_links.reject!{\|url\| url.eql?"http" or url.eql?"https"}.to_a.sort
	rescue Exception => e
	raise e
	end
	end
	end

	#####################################
	# Twitter
	# represents a simple wrapper around the twitter api
	# utilities to query twitter (hashtag search is currently supported)
	#####################################
	class Twitter
	private
	TWEETS_RECENT = 100
	TWEETS_PER_PAGE = 15
	TWEETS_RECENT_MAX = 1500
	TWITTER_SEARCH_URL = "http://search.twitter.com/search.json?result_type=recent&rpp=#{TWEETS_RECENT}"
	URI_ESCAPE = Regexp.new("[^#{URI::PATTERN::UNRESERVED}]")

	public
	def self.search q='',num=TWEETS_RECENT
	tweets(q,num)
	end

	private
	def self.tweets(q,num)
	begin
	raise ArgumentError, "INVALID SEARCH QUERY" if q.nil? or q.eql?''

	tweets = []
	Yajl::HttpStream.get(search_url(q,num),:symbolize_keys => true) do \|response\|
	results = response[:results]
	if 0 != results.size
	results.each {\|result\| tweets << result[:text]}
	end
	end
	tweets
	rescue Exception => e
	raise e
	end
	end

	def self.search_url(q,num,type=:results)
	case type
	when :results
	search_url_results q,num
	when :page
	search_url_page q
	else
	raise ArgumentError, "INVALID SEARCH URL TYPE"
	end
	end

	def self.search_url_results(q,results=TWEETS_RECENT)
	begin
	raise ArgumentError, "INVALID RESULTS PER PAGE REQUESTED" if results >= TWEETS_RECENT_MAX

	q_escaped = URI.escape(q.chomp,URI_ESCAPE)
	search_url = "#{TWITTER_SEARCH_URL}&q=#{q_escaped}"
	URI.parse search_url
	rescue Exception => e
	raise e
	end
	end

	def self.search_url_page(q,page=1)
	begin
	raise ArgumentError, "INVALID PAGE NUMBER" if page < 1

	q_escaped = URI.escape(q.chomp,URI_ESCAPE)
	search_url = "#{TWITTER_SEARCH_URL}&q=#{q_escaped}&page=#{page}"
	URI.parse search_url
	rescue Exception => e
	raise e
	end
	end

	end

	# twitter_search.rb
	#
	# perform a twitter search for a hashtag extract links if necessary
	# ie:
	# ruby twitter_search.rb <#hashtag>
	if $0 == __FILE__
	RECENT_TWEETS=100
	hashtag = nil
	usage = "\nusage: ruby twitter_search.rb hashtag \
	\n\teg: ruby twitter_search.rb '#puravida'"
	unless hashtag = ARGV[0]
	puts usage if hashtag.nil? or hashtag.empty?
	else
	begin
	if hashtag.eql?"" or hashtag.eql?"#"
	puts "\nplease specify a valid hashtag. ie: #puravida\n"
	else
	if hashtag =~ (/^\#/) and not hashtag.eql?"#"
	puts "\nsearching twitter for tweets that contain #{hashtag}\n"
	twitter_search = Twitter.search hashtag,RECENT_TWEETS
	if 0 != twitter_search.size
	twitter_search_urls = TwitterFeed.parse twitter_search
	puts "\n#{twitter_search_urls.size} unique urls found in the last #{RECENT_TWEETS} tweets that contain #{hashtag}\n"
	puts
	twitter_search_urls.each_with_index {\|item,idx\| puts "#{idx+1}. #{item}"}
	puts
	else
	puts "\nno results for #{hashtag}. try another hashtag.\n"
	end
	else
	puts "\nonly hashtag searches are supported\n"
	puts usage
	end
	end
	rescue Exception => e
	puts e
	end
	end
	end