Skip to content

Instantly share code, notes, and snippets.

@jamiew
Created May 13, 2011 05:53
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jamiew/970048 to your computer and use it in GitHub Desktop.
Save jamiew/970048 to your computer and use it in GitHub Desktop.
Noodling with different ways of expanding short URLs in bulk
require 'rubygems'
require 'benchmark'
require 'pp'
require 'mechanize'
require 'eventmachine'
require 'em-http-request'
def syncronous(urls)
agent = Mechanize.new
expanded_urls = urls.map{|url|
begin
resp = agent.head(url)
resp.uri.to_s
rescue
STDERR.puts "Error fetching #{url.inspect} => #{$!.inspect}"
end
}
return expanded_urls
end
def iterator(urls, concurrency = 10)
expanded_urls = nil
EventMachine.run do
responses = EM::Iterator.new(urls, concurrency).map(proc{|url,iter|
http = EventMachine::HttpRequest.new(url).head(:redirects => 1)
http.callback { iter.return(http.response_header['LOCATION']) }
}, proc{|responses|
expanded_urls = responses
EventMachine.stop
})
end
return expanded_urls
end
def multi(urls)
expanded_urls = nil
EventMachine.run do
multi = EventMachine::MultiRequest.new
urls.each{|url| multi.add(EventMachine::HttpRequest.new(url).head(:redirects => 1)) }
multi.callback do
expanded_urls = multi.responses[:succeeded].map{|r| r.response_header['LOCATION'] }
EventMachine.stop
end
end
return expanded_urls
end
# urls = ["http://owl.li/4TAMu", "http://nyti.ms/lXobRU", "http://youtu.be/ydbOwOpyF-o", "http://joint.im/beta/r/khPZl9sn4gT", "http://www.aim.com/av/", "http://nyti.ms/kMalpD", "http://nyti.ms/kushxM", "http://4sq.com/ls4SMM", "http://bzfd.it/lCFVTs", "http://29.media.tumblr.com/tumblr_ll3cdksMCl1qz6f9yo1_500.jpg", "http://bit.ly/mdmt3N", "http://bit.ly/juekXw", "http://nyti.ms/j4FEdf", "http://kck.st/l1PnlT", "http://j.mp/5epoi", "http://j.mp/kmrpev", "http://ttk.me/t4Bm5", "http://bit.ly/lTPQFN", "http://4sq.com/lFdbD1", "http://bit.ly/l6MXft", "http://nyti.ms/lSWIPU", "http://www.psfk.com/2011/05/the-quarterlife-crisis-young-insecure-and-depressed.html?sms_ss=twitter&at_xt=4dcc927f32bf1691,0", "http://su.pr/A1eCd9", "http://instagr.am/p/EK-_5/", "http://twitpic.com/4wv5oc", "http://bit.ly/lyeHQf", "http://bit.ly/jkOi8o", "http://4sq.com/mzo07c", "http://nyti.ms/iFlKgo", "http://wp.me/p4-Hw", "http://bzfd.it/llbwwg", "http://bit.ly/mv4h50", "http://bit.ly/j7RkNE", "http://mikufes.com", "http://mikufes.com/", "http://www.anime-expo.org/?p=6818", "http://bit.ly/k6hbm4", "http://nyti.ms/iLtXjX", "http://bit.ly/kHTq9e", "http://soupsoup.net/lTB4ym", "http://ttk.me/t4Bm1", "http://knowledge.wharton.upenn.edu/article.cfm?articleid=2755", "http://nyti.ms/jjTgpd", "http://bit.ly/kEW119", "http://su.pr/A6nk05", "http://instagr.am/p/ELE55/", "http://nyti.ms/kOseS2", "http://is.gd/fn8Vyz", "http://tcrn.ch/kx2nx0", "http://bit.ly/lR4bIZ", "http://bzfd.it/ka6KT7", "http://soupsoup.net/miMftM", "http://bit.ly/kHBc9P", "http://is.gd/ztnK1C", "http://youtu.be/qwvdxV26q8I", "http://ping.fm/yDzgX", "http://bit.ly/jvmnNp", "http://chzb.gr/rollercoasterchess", "http://bit.ly/hb3UbD", "http://bit.ly/lQ6WHv", "http://www.storyful.com/stories/gjdjks", "http://vhx.tv/73589", "http://econ.st/mJjaDX", "http://strawberrymoth.blogspot.com/2011/01/inspiring-style-files-presents.html", "http://twitpic.com/4wu6ll", "http://nyr.kr/lZDT7n", "http://nyti.ms/m1CfjD", "http://amarpai.com/bikemap/bikemap.html", "http://bit.ly/bjyRwD", "http://nyti.ms/io6orz", "http://nyti.ms/mtZNEn", "http://bit.ly/k9KYwL", "http://yfrog.com/h0xq7vqj", "http://hb.ly/mqh5Y9", "http://drbl.in/bkDf", "http://fncy.it/lEy1EK", "http://4sq.com/iRoVnp", "http://www.justin.tv/hatperson", "http://www.htmlfivewow.com/", "http://aim.com/av", "http://instagr.am/p/EK8mh/", "http://4sq.com/lLQRrp", "http://storify.com/hrheingold/students-selforganize-their-own-syllabus", "http://sfy.co/8Wm", "http://on.fb.me/kayJ2u", "http://frc.vc/3eN", "http://nyti.ms/ljFiEw", "http://bit.ly/kCjzKp", "http://twitpic.com/4wtowc", "http://nyti.ms/jViJ3s", "http://nyti.ms/iB1o3b", "http://bzfd.it/kTfJdD", "http://lockerz.com/s/101093774", "http://thepulsenetwork.com/technology/stevegarfield-tv-on-tpn/05-12-11-this-week-in-steve/", "http://thepulsenetwork.com/technology/stevegarfield-tv-on-tpn/05-12-11-video-soup/", "http://thepulsenetwork.com/technology/stevegarfield-tv-on-tpn/05-12-11-the-killing-of-bin-laden/"]
urls = ["http://owl.li/4TAMu", "http://nyti.ms/lXobRU", "http://youtu.be/ydbOwOpyF-o", "http://joint.im/beta/r/khPZl9sn4gT", "http://www.aim.com/av/", "http://nyti.ms/kMalpD", "http://nyti.ms/kushxM", "http://4sq.com/ls4SMM", "http://bzfd.it/lCFVTs", "http://bit.ly/juekXw"]
puts "Processing #{urls.length} URLs..."
puts "\nSyncronously..."
output0 = []
time = Benchmark.realtime{ output0 = syncronous(urls).compact.sort }
pp output0.sort
puts "Took #{time}s (#{(time*1000.0/urls.length.to_f).to_i}ms/url)"
sleep 2
puts "\nEM::Iterator(2)..."
output1 = []
time = Benchmark.realtime{ output1 = iterator(urls, 2).compact.sort }
pp output1.sort
puts "Took #{time}s (#{(time*1000.0/urls.length.to_f).to_i}ms/url)"
sleep 2
puts "\nEM::Iterator(10)..."
output2 = []
time = Benchmark.realtime{ output2 = iterator(urls, 10).compact.sort }
pp output2.sort
puts "Took #{time}s (#{(time*1000.0/urls.length.to_f).to_i}ms/url)"
sleep 2
puts "\nEm::MultiRequest..."
output3 = []
time = Benchmark.realtime{ output3 = multi(urls).compact.sort }
pp output3.sort
puts "Took #{time}s (#{(time*1000.0/urls.length.to_f).to_i}ms/url)"
sleep 2
puts "\nDone!"
puts "output0.length=#{output0.length} output1.length=#{output1.length} output2.length=#{output2.length} output3.length=#{output3.length}"
puts "0==1?#{output0 == output1} 1==2?#{output1 == output2} 2==3?#{output2 == output3}"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment