#!/usr/bin/env ruby require 'rubygems' gem 'hpricot', '>=0.6.170' require 'open-uri' require 'hpricot' gem 'nokogiri', '>1.0.5' require 'nokogiri' require 'benchmark' xml_content = open("http://railstips.org/assets/2008/8/9/timeline.xml").read html_content = open("http://slashdot.org/").read N = 500 hdoc_xml = Hpricot.XML(xml_content) hdoc_xml2 = Hpricot.scan(xml_content) hdoc_html = Hpricot(html_content) hdoc_html2 = Hpricot.scan(html_content) ndoc_compat = Nokogiri.Hpricot(xml_content) ndoc_xml = Nokogiri::XML(xml_content) ndoc_html = Nokogiri::HTML(html_content) # XML Document Parsing puts "XML Document parsing benchmark" Benchmark.bm(25) do |x| x.report('hpricot:xml:doc') do N.times do Hpricot.XML(xml_content) end end x.report('hpricot2:xml:doc') do N.times do Hpricot.scan(xml_content) end end x.report('nokogiri:compat:doc') do N.times do Nokogiri.Hpricot(xml_content) end end x.report('nokogiri:xml:doc') do N.times do Nokogiri::XML(xml_content) end end end xpath1 = '//status/text' xpath2 = '//user/name' puts puts "XML XPath benchmarks (#{xpath1}, #{xpath2})" # XML XPath benchmarks Benchmark.bm(25) do |x| x.report('hpricot:xml:xpath') do N.times do info = hdoc_xml.search(xpath1).first.inner_text url = hdoc_xml.search(xpath2).first.inner_text end end x.report('hpricot2:xml:xpath') do N.times do info = hdoc_xml2.search(xpath1).first.inner_text url = hdoc_xml2.search(xpath2).first.inner_text end end x.report('nokogiri:compat:xpath') do N.times do info = ndoc_compat.search(xpath1).first.inner_text url = ndoc_compat.search(xpath2).first.inner_text end end x.report('nokogiri:xml:xpath') do N.times do info = ndoc_xml.search(xpath1).first.inner_text url = ndoc_xml.search(xpath2).first.inner_text end end end # XML CSS benchmarks css1 = 'status text' css2 = 'user name' puts puts "XML CSS selector benchmarks (#{css1}, #{css2})" Benchmark.bm(25) do |x| x.report('hpricot:xml:css') do N.times do info = hdoc_xml.search(css1).first.inner_text url = hdoc_xml.search(css2).first.inner_text end end x.report('hpricot2:xml:css') do N.times do info = hdoc_xml2.search(css1).first.inner_text url = hdoc_xml2.search(css2).first.inner_text end end x.report('nokogiri:compat:css') do N.times do info = ndoc_compat.search(css1).first.inner_text url = ndoc_compat.search(css2).first.inner_text end end x.report('nokogiri:xml:css') do N.times do info = ndoc_xml.search(css1).first.inner_text url = ndoc_xml.search(css2).first.inner_text end end end # HTML Document Parsing puts puts "HTML Document parsing benchmark (slashdot.org)" Benchmark.bm(25) do |x| x.report('hpricot:html:doc') do N.times do Hpricot(html_content) end end x.report('hpricot2:html:doc') do N.times do Hpricot.scan(html_content) end end x.report('nokogiri:html:doc') do N.times do Nokogiri::HTML(html_content) end end end # HTML XPath benchmarks html_xpath = '//h1/a' puts puts "HTML XPath benchmarks (#{html_xpath})" Benchmark.bm(25) do |x| x.report('hpricot:html:xpath') do N.times do info = hdoc_html.search(html_xpath).first.inner_text end end # Fail #x.report('hpricot2:html:xpath') do # N.times do # info = hdoc_html2.search(html_xpath).first.inner_text # end #end # x.report('nokogiri:html:xpath') do N.times do info = ndoc_html.search(html_xpath).first.inner_text end end end # EASY HTML CSS benchmarks html_css = 'h1 > a' puts puts "Easy HTML CSS benchmarks (#{html_css})" Benchmark.bm(25) do |x| x.report('hpricot:html:css') do N.times do info = hdoc_html.search(html_css).first.inner_text end end # Fail #x.report('hpricot2:html:css') do # N.times do # info = hdoc_html2.search(html_css).first.inner_text # end #end x.report('nokogiri:html:css') do N.times do info = ndoc_html.search(html_css).first.inner_text end end end # HARD HTML CSS benchmarks hard_html_css = 'div#logo > h1 > a' puts puts "Hard HTML CSS benchmarks (#{hard_html_css})" Benchmark.bm(25) do |x| x.report('hpricot:html:css') do N.times do info = hdoc_html.search(hard_html_css).first.inner_text end end # Fail #x.report('hpricot2:html:css') do # N.times do # info = hdoc_html2.search(hard_html_css).first.inner_text # end #end x.report('nokogiri:html:css') do N.times do info = ndoc_html.search(hard_html_css).first.inner_text end end end