Skip to content

Instantly share code, notes, and snippets.

@careo
Created March 19, 2009 20:35
Show Gist options
  • Save careo/82040 to your computer and use it in GitHub Desktop.
Save careo/82040 to your computer and use it in GitHub Desktop.
require 'pp'
require 'uri'
require 'rubygems'
require 'json/ext'
require 'eventmachine'
require 'dnsruby'
require '/Users/djensen/Projects/em-http-request/lib/em-http'
require '/Users/djensen/Projects/Arachnid/lib/arachnid'
include Arachnid
Dnsruby::Resolver.use_eventmachine
Dnsruby::Resolver.start_eventmachine_loop(false)
@res = Dnsruby::Resolver.new # use system defaults
opml = Opml.new(File.read("../data/google-reader-subscriptions.xml"))
# Fetch a feed. First fire off the async DNS request, then
# the async http GET
def fetch feed
resolve feed do |uri|
get uri,feed do |http|
status = http.response_header.status
headers = http.response_header
body = http.response
@responses[status] += 1
end
end
end
def resolve uri, &blk
resolved_uri = uri.clone
df = @res.send_async Dnsruby::Message.new(uri.host)
@resolving.push df
df.callback { |msg|
@resolving.delete df
ip = msg.answer.find { |rr| rr.type == "A"}.rdata.to_s
resolved_uri.host = ip
# do whatever it is we should do
blk.call(resolved_uri)
}
df.errback { |msg, err|
@resolving.delete df
puts "Sorry - can't resolve #{uri}. Message=#{msg} Error=#{err}"
}
EventMachine.add_timer(15) {
df.fail("timeout's a bitch")
}
df
end
def get uri,feed, &blk
http = EventMachine::HttpRequest.new(uri.to_s).get :head => {:host => feed.host}
@gets.push http
http.callback {
blk.call(http)
@gets.delete http
}
http.errback { |msg, err|
@gets.delete http
@responses[:error] += 1
puts "Sorry - can't get #{feed}. Msg=#{msg} Error=#{err}"
}
end
EventMachine.run {
EM.kqueue if EM.kqueue?
EM.epoll if EM.epoll?
start = Time.now
@feeds = opml.feeds.collect { |f| URI.parse f.to_s }
@resolving = []
@gets = []
@responses = Hash.new(0)
# keep the resolution queue topped off if possible
EventMachine.add_periodic_timer(0.1) {
while true do
break if @feeds.empty?
break if @resolving.size == 50
feed = @feeds.pop
fetch(feed)
end
}
# Quit when we're done with all the feeds.
EventMachine.add_periodic_timer(1) {
p [@feeds.size, @resolving.size, @gets.size]
p @responses
p "runtime: #{Time.now - start}"
if @feeds.empty? and @resolving.empty? and @gets.empty?
EventMachine.stop
end
}
} #EventMachine.run
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment