Created
March 19, 2009 20:35
-
-
Save careo/82040 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'pp' | |
require 'uri' | |
require 'rubygems' | |
require 'json/ext' | |
require 'eventmachine' | |
require 'dnsruby' | |
require '/Users/djensen/Projects/em-http-request/lib/em-http' | |
require '/Users/djensen/Projects/Arachnid/lib/arachnid' | |
include Arachnid | |
Dnsruby::Resolver.use_eventmachine | |
Dnsruby::Resolver.start_eventmachine_loop(false) | |
@res = Dnsruby::Resolver.new # use system defaults | |
opml = Opml.new(File.read("../data/google-reader-subscriptions.xml")) | |
# Fetch a feed. First fire off the async DNS request, then | |
# the async http GET | |
def fetch feed | |
resolve feed do |uri| | |
get uri,feed do |http| | |
status = http.response_header.status | |
headers = http.response_header | |
body = http.response | |
@responses[status] += 1 | |
end | |
end | |
end | |
def resolve uri, &blk | |
resolved_uri = uri.clone | |
df = @res.send_async Dnsruby::Message.new(uri.host) | |
@resolving.push df | |
df.callback { |msg| | |
@resolving.delete df | |
ip = msg.answer.find { |rr| rr.type == "A"}.rdata.to_s | |
resolved_uri.host = ip | |
# do whatever it is we should do | |
blk.call(resolved_uri) | |
} | |
df.errback { |msg, err| | |
@resolving.delete df | |
puts "Sorry - can't resolve #{uri}. Message=#{msg} Error=#{err}" | |
} | |
EventMachine.add_timer(15) { | |
df.fail("timeout's a bitch") | |
} | |
df | |
end | |
def get uri,feed, &blk | |
http = EventMachine::HttpRequest.new(uri.to_s).get :head => {:host => feed.host} | |
@gets.push http | |
http.callback { | |
blk.call(http) | |
@gets.delete http | |
} | |
http.errback { |msg, err| | |
@gets.delete http | |
@responses[:error] += 1 | |
puts "Sorry - can't get #{feed}. Msg=#{msg} Error=#{err}" | |
} | |
end | |
EventMachine.run { | |
EM.kqueue if EM.kqueue? | |
EM.epoll if EM.epoll? | |
start = Time.now | |
@feeds = opml.feeds.collect { |f| URI.parse f.to_s } | |
@resolving = [] | |
@gets = [] | |
@responses = Hash.new(0) | |
# keep the resolution queue topped off if possible | |
EventMachine.add_periodic_timer(0.1) { | |
while true do | |
break if @feeds.empty? | |
break if @resolving.size == 50 | |
feed = @feeds.pop | |
fetch(feed) | |
end | |
} | |
# Quit when we're done with all the feeds. | |
EventMachine.add_periodic_timer(1) { | |
p [@feeds.size, @resolving.size, @gets.size] | |
p @responses | |
p "runtime: #{Time.now - start}" | |
if @feeds.empty? and @resolving.empty? and @gets.empty? | |
EventMachine.stop | |
end | |
} | |
} #EventMachine.run |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment