Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
#!/usr/bin/env ruby
require 'rubygems'
gem 'hpricot', '>=0.6.170'
require 'open-uri'
require 'hpricot'
gem 'nokogiri', '>1.0.5'
require 'nokogiri'
require 'benchmark'
xml_content = open("http://railstips.org/assets/2008/8/9/timeline.xml").read
html_content = open("http://slashdot.org/").read
N = 500
hdoc_xml = Hpricot.XML(xml_content)
hdoc_xml2 = Hpricot.scan(xml_content)
hdoc_html = Hpricot(html_content)
hdoc_html2 = Hpricot.scan(html_content)
ndoc_compat = Nokogiri.Hpricot(xml_content)
ndoc_xml = Nokogiri::XML(xml_content)
ndoc_html = Nokogiri::HTML(html_content)
# XML Document Parsing
puts "XML Document parsing benchmark"
Benchmark.bm(25) do |x|
x.report('hpricot:xml:doc') do
N.times do
Hpricot.XML(xml_content)
end
end
x.report('hpricot2:xml:doc') do
N.times do
Hpricot.scan(xml_content)
end
end
x.report('nokogiri:compat:doc') do
N.times do
Nokogiri.Hpricot(xml_content)
end
end
x.report('nokogiri:xml:doc') do
N.times do
Nokogiri::XML(xml_content)
end
end
end
xpath1 = '//status/text'
xpath2 = '//user/name'
puts
puts "XML XPath benchmarks (#{xpath1}, #{xpath2})"
# XML XPath benchmarks
Benchmark.bm(25) do |x|
x.report('hpricot:xml:xpath') do
N.times do
info = hdoc_xml.search(xpath1).first.inner_text
url = hdoc_xml.search(xpath2).first.inner_text
end
end
x.report('hpricot2:xml:xpath') do
N.times do
info = hdoc_xml2.search(xpath1).first.inner_text
url = hdoc_xml2.search(xpath2).first.inner_text
end
end
x.report('nokogiri:compat:xpath') do
N.times do
info = ndoc_compat.search(xpath1).first.inner_text
url = ndoc_compat.search(xpath2).first.inner_text
end
end
x.report('nokogiri:xml:xpath') do
N.times do
info = ndoc_xml.search(xpath1).first.inner_text
url = ndoc_xml.search(xpath2).first.inner_text
end
end
end
# XML CSS benchmarks
css1 = 'status text'
css2 = 'user name'
puts
puts "XML CSS selector benchmarks (#{css1}, #{css2})"
Benchmark.bm(25) do |x|
x.report('hpricot:xml:css') do
N.times do
info = hdoc_xml.search(css1).first.inner_text
url = hdoc_xml.search(css2).first.inner_text
end
end
x.report('hpricot2:xml:css') do
N.times do
info = hdoc_xml2.search(css1).first.inner_text
url = hdoc_xml2.search(css2).first.inner_text
end
end
x.report('nokogiri:compat:css') do
N.times do
info = ndoc_compat.search(css1).first.inner_text
url = ndoc_compat.search(css2).first.inner_text
end
end
x.report('nokogiri:xml:css') do
N.times do
info = ndoc_xml.search(css1).first.inner_text
url = ndoc_xml.search(css2).first.inner_text
end
end
end
# HTML Document Parsing
puts
puts "HTML Document parsing benchmark (slashdot.org)"
Benchmark.bm(25) do |x|
x.report('hpricot:html:doc') do
N.times do
Hpricot(html_content)
end
end
x.report('hpricot2:html:doc') do
N.times do
Hpricot.scan(html_content)
end
end
x.report('nokogiri:html:doc') do
N.times do
Nokogiri::HTML(html_content)
end
end
end
# HTML XPath benchmarks
html_xpath = '//h1/a'
puts
puts "HTML XPath benchmarks (#{html_xpath})"
Benchmark.bm(25) do |x|
x.report('hpricot:html:xpath') do
N.times do
info = hdoc_html.search(html_xpath).first.inner_text
end
end
# Fail
#x.report('hpricot2:html:xpath') do
# N.times do
# info = hdoc_html2.search(html_xpath).first.inner_text
# end
#end
#
x.report('nokogiri:html:xpath') do
N.times do
info = ndoc_html.search(html_xpath).first.inner_text
end
end
end
# EASY HTML CSS benchmarks
html_css = 'h1 > a'
puts
puts "Easy HTML CSS benchmarks (#{html_css})"
Benchmark.bm(25) do |x|
x.report('hpricot:html:css') do
N.times do
info = hdoc_html.search(html_css).first.inner_text
end
end
# Fail
#x.report('hpricot2:html:css') do
# N.times do
# info = hdoc_html2.search(html_css).first.inner_text
# end
#end
x.report('nokogiri:html:css') do
N.times do
info = ndoc_html.search(html_css).first.inner_text
end
end
end
# HARD HTML CSS benchmarks
hard_html_css = 'div#logo > h1 > a'
puts
puts "Hard HTML CSS benchmarks (#{hard_html_css})"
Benchmark.bm(25) do |x|
x.report('hpricot:html:css') do
N.times do
info = hdoc_html.search(hard_html_css).first.inner_text
end
end
# Fail
#x.report('hpricot2:html:css') do
# N.times do
# info = hdoc_html2.search(hard_html_css).first.inner_text
# end
#end
x.report('nokogiri:html:css') do
N.times do
info = ndoc_html.search(hard_html_css).first.inner_text
end
end
end
XML Document parsing benchmark
user system total real
hpricot:xml:doc 10.160000 0.950000 11.110000 ( 11.144462)
hpricot2:xml:doc 0.950000 0.000000 0.950000 ( 0.953266)
nokogiri:compat:doc 0.220000 0.020000 0.240000 ( 0.238401)
nokogiri:xml:doc 0.170000 0.030000 0.200000 ( 0.200283)
XML XPath benchmarks (//status/text, //user/name)
user system total real
hpricot:xml:xpath 7.580000 1.150000 8.730000 ( 8.728314)
hpricot2:xml:xpath 3.980000 0.530000 4.510000 ( 4.536297)
nokogiri:compat:xpath 0.100000 0.010000 0.110000 ( 0.103876)
nokogiri:xml:xpath 0.060000 0.000000 0.060000 ( 0.069590)
XML CSS selector benchmarks (status text, user name)
user system total real
hpricot:xml:css 7.650000 1.360000 9.010000 ( 9.035288)
hpricot2:xml:css 3.480000 0.660000 4.140000 ( 4.143033)
nokogiri:compat:css 0.100000 0.010000 0.110000 ( 0.106788)
nokogiri:xml:css 0.070000 0.000000 0.070000 ( 0.076784)
HTML Document parsing benchmark (slashdot.org)
user system total real
hpricot:html:doc 48.930000 3.640000 52.570000 ( 52.900035)
hpricot2:html:doc 4.500000 0.020000 4.520000 ( 4.518984)
nokogiri:html:doc 3.640000 0.130000 3.770000 ( 3.770642)
HTML XPath benchmarks (//h1/a)
user system total real
hpricot:html:xpath 8.250000 1.130000 9.380000 ( 9.468790)
nokogiri:html:xpath 0.060000 0.000000 0.060000 ( 0.063775)
Easy HTML CSS benchmarks (h1 > a)
user system total real
hpricot:html:css 6.780000 1.100000 7.880000 ( 8.137288)
nokogiri:html:css 0.070000 0.010000 0.080000 ( 0.135311)
Hard HTML CSS benchmarks (div#logo > h1 > a)
user system total real
hpricot:html:css 10.440000 1.740000 12.180000 ( 12.326511)
nokogiri:html:css 0.290000 0.000000 0.290000 ( 0.296762)
Use Nokogiri's XPath selectors for fastest speed - CSS-based search is faster than Hpricot but not as fast.
Also take note that this benchmark is only shows parsing of XML (not HTML).
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.