Created
May 11, 2011 16:46
-
-
Save sukhchander/966846 to your computer and use it in GitHub Desktop.
search twitter for the latest keyword / hashtag
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding: UTF-8 | |
require 'rubygems' | |
require 'set' | |
require 'uri' | |
require 'yajl' | |
require 'yajl/http_stream' | |
##################################### | |
# TwitterFeed | |
# represents the twitter feed / stream | |
# utilities to extract information from tweets (url extraction is currently supported) | |
##################################### | |
class TwitterFeed | |
private | |
URL_REGEX = /((http|https):\/\/\S+)/mixs | |
#URL_REGEX = /(http|https:\/\/[a-z0-9]+([-.]{1}[a-z0-9]+)*.[a-z]{2,5}(([0-9]{1,5})?\/))\S+/mixs | |
public | |
def self.parse(enumerable=[],type=:url) | |
case type | |
when :url | |
extract_urls enumerable | |
else | |
raise ArugmentError, "INVALID EXTRACTION TYPE" | |
end | |
end | |
private | |
def self.extract_urls tweets | |
begin | |
raise ArgumentError, "EMPTY TWEETS. MAYBE NEED TO RETWEET." if tweets.nil? or 0==tweets.size | |
extracted_links=Set.new | |
tweets.each do |tweet| | |
links = tweet.scan URL_REGEX | |
extracted_links.merge links.flatten | |
end | |
extracted_links.reject!{|url| url.eql?"http" or url.eql?"https"}.to_a.sort | |
rescue Exception => e | |
raise e | |
end | |
end | |
end | |
##################################### | |
# represents a simple wrapper around the twitter api | |
# utilities to query twitter (hashtag search is currently supported) | |
##################################### | |
class Twitter | |
private | |
TWEETS_RECENT = 100 | |
TWEETS_PER_PAGE = 15 | |
TWEETS_RECENT_MAX = 1500 | |
TWITTER_SEARCH_URL = "http://search.twitter.com/search.json?result_type=recent&rpp=#{TWEETS_RECENT}" | |
URI_ESCAPE = Regexp.new("[^#{URI::PATTERN::UNRESERVED}]") | |
public | |
def self.search q='',num=TWEETS_RECENT | |
tweets(q,num) | |
end | |
private | |
def self.tweets(q,num) | |
begin | |
raise ArgumentError, "INVALID SEARCH QUERY" if q.nil? or q.eql?'' | |
tweets = [] | |
Yajl::HttpStream.get(search_url(q,num),:symbolize_keys => true) do |response| | |
results = response[:results] | |
if 0 != results.size | |
results.each {|result| tweets << result[:text]} | |
end | |
end | |
tweets | |
rescue Exception => e | |
raise e | |
end | |
end | |
def self.search_url(q,num,type=:results) | |
case type | |
when :results | |
search_url_results q,num | |
when :page | |
search_url_page q | |
else | |
raise ArgumentError, "INVALID SEARCH URL TYPE" | |
end | |
end | |
def self.search_url_results(q,results=TWEETS_RECENT) | |
begin | |
raise ArgumentError, "INVALID RESULTS PER PAGE REQUESTED" if results >= TWEETS_RECENT_MAX | |
q_escaped = URI.escape(q.chomp,URI_ESCAPE) | |
search_url = "#{TWITTER_SEARCH_URL}&q=#{q_escaped}" | |
URI.parse search_url | |
rescue Exception => e | |
raise e | |
end | |
end | |
def self.search_url_page(q,page=1) | |
begin | |
raise ArgumentError, "INVALID PAGE NUMBER" if page < 1 | |
q_escaped = URI.escape(q.chomp,URI_ESCAPE) | |
search_url = "#{TWITTER_SEARCH_URL}&q=#{q_escaped}&page=#{page}" | |
URI.parse search_url | |
rescue Exception => e | |
raise e | |
end | |
end | |
end | |
# twitter_search.rb | |
# | |
# perform a twitter search for a hashtag extract links if necessary | |
# ie: | |
# ruby twitter_search.rb <#hashtag> | |
if $0 == __FILE__ | |
RECENT_TWEETS=100 | |
hashtag = nil | |
usage = "\nusage: ruby twitter_search.rb hashtag \ | |
\n\teg: ruby twitter_search.rb '#puravida'" | |
unless hashtag = ARGV[0] | |
puts usage if hashtag.nil? or hashtag.empty? | |
else | |
begin | |
if hashtag.eql?"" or hashtag.eql?"#" | |
puts "\nplease specify a valid hashtag. ie: #puravida\n" | |
else | |
if hashtag =~ (/^\#/) and not hashtag.eql?"#" | |
puts "\nsearching twitter for tweets that contain #{hashtag}\n" | |
twitter_search = Twitter.search hashtag,RECENT_TWEETS | |
if 0 != twitter_search.size | |
twitter_search_urls = TwitterFeed.parse twitter_search | |
puts "\n#{twitter_search_urls.size} unique urls found in the last #{RECENT_TWEETS} tweets that contain #{hashtag}\n" | |
puts | |
twitter_search_urls.each_with_index {|item,idx| puts "#{idx+1}. #{item}"} | |
puts | |
else | |
puts "\nno results for #{hashtag}. try another hashtag.\n" | |
end | |
else | |
puts "\nonly hashtag searches are supported\n" | |
puts usage | |
end | |
end | |
rescue Exception => e | |
puts e | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment