-
-
Save flavorjones/24605 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'rubygems' | |
gem 'hpricot', '>=0.6.170' | |
require 'open-uri' | |
require 'hpricot' | |
gem 'nokogiri', '>1.0.5' | |
require 'nokogiri' | |
require 'benchmark' | |
xml_content = open("http://railstips.org/assets/2008/8/9/timeline.xml").read | |
html_content = open("http://slashdot.org/").read | |
N = 500 | |
hdoc_xml = Hpricot.XML(xml_content) | |
hdoc_xml2 = Hpricot.scan(xml_content) | |
hdoc_html = Hpricot(html_content) | |
hdoc_html2 = Hpricot.scan(html_content) | |
ndoc_compat = Nokogiri.Hpricot(xml_content) | |
ndoc_xml = Nokogiri::XML(xml_content) | |
ndoc_html = Nokogiri::HTML(html_content) | |
# XML Document Parsing | |
puts "XML Document parsing benchmark" | |
Benchmark.bm(25) do |x| | |
x.report('hpricot:xml:doc') do | |
N.times do | |
Hpricot.XML(xml_content) | |
end | |
end | |
x.report('hpricot2:xml:doc') do | |
N.times do | |
Hpricot.scan(xml_content) | |
end | |
end | |
x.report('nokogiri:compat:doc') do | |
N.times do | |
Nokogiri.Hpricot(xml_content) | |
end | |
end | |
x.report('nokogiri:xml:doc') do | |
N.times do | |
Nokogiri::XML(xml_content) | |
end | |
end | |
end | |
xpath1 = '//status/text' | |
xpath2 = '//user/name' | |
puts | |
puts "XML XPath benchmarks (#{xpath1}, #{xpath2})" | |
# XML XPath benchmarks | |
Benchmark.bm(25) do |x| | |
x.report('hpricot:xml:xpath') do | |
N.times do | |
info = hdoc_xml.search(xpath1).first.inner_text | |
url = hdoc_xml.search(xpath2).first.inner_text | |
end | |
end | |
x.report('hpricot2:xml:xpath') do | |
N.times do | |
info = hdoc_xml2.search(xpath1).first.inner_text | |
url = hdoc_xml2.search(xpath2).first.inner_text | |
end | |
end | |
x.report('nokogiri:compat:xpath') do | |
N.times do | |
info = ndoc_compat.search(xpath1).first.inner_text | |
url = ndoc_compat.search(xpath2).first.inner_text | |
end | |
end | |
x.report('nokogiri:xml:xpath') do | |
N.times do | |
info = ndoc_xml.search(xpath1).first.inner_text | |
url = ndoc_xml.search(xpath2).first.inner_text | |
end | |
end | |
end | |
# XML CSS benchmarks | |
css1 = 'status text' | |
css2 = 'user name' | |
puts | |
puts "XML CSS selector benchmarks (#{css1}, #{css2})" | |
Benchmark.bm(25) do |x| | |
x.report('hpricot:xml:css') do | |
N.times do | |
info = hdoc_xml.search(css1).first.inner_text | |
url = hdoc_xml.search(css2).first.inner_text | |
end | |
end | |
x.report('hpricot2:xml:css') do | |
N.times do | |
info = hdoc_xml2.search(css1).first.inner_text | |
url = hdoc_xml2.search(css2).first.inner_text | |
end | |
end | |
x.report('nokogiri:compat:css') do | |
N.times do | |
info = ndoc_compat.search(css1).first.inner_text | |
url = ndoc_compat.search(css2).first.inner_text | |
end | |
end | |
x.report('nokogiri:xml:css') do | |
N.times do | |
info = ndoc_xml.search(css1).first.inner_text | |
url = ndoc_xml.search(css2).first.inner_text | |
end | |
end | |
end | |
# HTML Document Parsing | |
puts | |
puts "HTML Document parsing benchmark (slashdot.org)" | |
Benchmark.bm(25) do |x| | |
x.report('hpricot:html:doc') do | |
N.times do | |
Hpricot(html_content) | |
end | |
end | |
x.report('hpricot2:html:doc') do | |
N.times do | |
Hpricot.scan(html_content) | |
end | |
end | |
x.report('nokogiri:html:doc') do | |
N.times do | |
Nokogiri::HTML(html_content) | |
end | |
end | |
end | |
# HTML XPath benchmarks | |
html_xpath = '//h1/a' | |
puts | |
puts "HTML XPath benchmarks (#{html_xpath})" | |
Benchmark.bm(25) do |x| | |
x.report('hpricot:html:xpath') do | |
N.times do | |
info = hdoc_html.search(html_xpath).first.inner_text | |
end | |
end | |
# Fail | |
#x.report('hpricot2:html:xpath') do | |
# N.times do | |
# info = hdoc_html2.search(html_xpath).first.inner_text | |
# end | |
#end | |
# | |
x.report('nokogiri:html:xpath') do | |
N.times do | |
info = ndoc_html.search(html_xpath).first.inner_text | |
end | |
end | |
end | |
# EASY HTML CSS benchmarks | |
html_css = 'h1 > a' | |
puts | |
puts "Easy HTML CSS benchmarks (#{html_css})" | |
Benchmark.bm(25) do |x| | |
x.report('hpricot:html:css') do | |
N.times do | |
info = hdoc_html.search(html_css).first.inner_text | |
end | |
end | |
# Fail | |
#x.report('hpricot2:html:css') do | |
# N.times do | |
# info = hdoc_html2.search(html_css).first.inner_text | |
# end | |
#end | |
x.report('nokogiri:html:css') do | |
N.times do | |
info = ndoc_html.search(html_css).first.inner_text | |
end | |
end | |
end | |
# HARD HTML CSS benchmarks | |
hard_html_css = 'div#logo > h1 > a' | |
puts | |
puts "Hard HTML CSS benchmarks (#{hard_html_css})" | |
Benchmark.bm(25) do |x| | |
x.report('hpricot:html:css') do | |
N.times do | |
info = hdoc_html.search(hard_html_css).first.inner_text | |
end | |
end | |
# Fail | |
#x.report('hpricot2:html:css') do | |
# N.times do | |
# info = hdoc_html2.search(hard_html_css).first.inner_text | |
# end | |
#end | |
x.report('nokogiri:html:css') do | |
N.times do | |
info = ndoc_html.search(hard_html_css).first.inner_text | |
end | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
XML Document parsing benchmark | |
user system total real | |
hpricot:xml:doc 10.160000 0.950000 11.110000 ( 11.144462) | |
hpricot2:xml:doc 0.950000 0.000000 0.950000 ( 0.953266) | |
nokogiri:compat:doc 0.220000 0.020000 0.240000 ( 0.238401) | |
nokogiri:xml:doc 0.170000 0.030000 0.200000 ( 0.200283) | |
XML XPath benchmarks (//status/text, //user/name) | |
user system total real | |
hpricot:xml:xpath 7.580000 1.150000 8.730000 ( 8.728314) | |
hpricot2:xml:xpath 3.980000 0.530000 4.510000 ( 4.536297) | |
nokogiri:compat:xpath 0.100000 0.010000 0.110000 ( 0.103876) | |
nokogiri:xml:xpath 0.060000 0.000000 0.060000 ( 0.069590) | |
XML CSS selector benchmarks (status text, user name) | |
user system total real | |
hpricot:xml:css 7.650000 1.360000 9.010000 ( 9.035288) | |
hpricot2:xml:css 3.480000 0.660000 4.140000 ( 4.143033) | |
nokogiri:compat:css 0.100000 0.010000 0.110000 ( 0.106788) | |
nokogiri:xml:css 0.070000 0.000000 0.070000 ( 0.076784) | |
HTML Document parsing benchmark (slashdot.org) | |
user system total real | |
hpricot:html:doc 48.930000 3.640000 52.570000 ( 52.900035) | |
hpricot2:html:doc 4.500000 0.020000 4.520000 ( 4.518984) | |
nokogiri:html:doc 3.640000 0.130000 3.770000 ( 3.770642) | |
HTML XPath benchmarks (//h1/a) | |
user system total real | |
hpricot:html:xpath 8.250000 1.130000 9.380000 ( 9.468790) | |
nokogiri:html:xpath 0.060000 0.000000 0.060000 ( 0.063775) | |
Easy HTML CSS benchmarks (h1 > a) | |
user system total real | |
hpricot:html:css 6.780000 1.100000 7.880000 ( 8.137288) | |
nokogiri:html:css 0.070000 0.010000 0.080000 ( 0.135311) | |
Hard HTML CSS benchmarks (div#logo > h1 > a) | |
user system total real | |
hpricot:html:css 10.440000 1.740000 12.180000 ( 12.326511) | |
nokogiri:html:css 0.290000 0.000000 0.290000 ( 0.296762) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Use Nokogiri's XPath selectors for fastest speed - CSS-based search is faster than Hpricot but not as fast. | |
Also take note that this benchmark is only shows parsing of XML (not HTML). |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment