gist: 24605 Download_button fork
public
Public Clone URL: git://gist.github.com/24605.git
benchmark.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/env ruby
require 'rubygems'
gem 'hpricot', '>=0.6.170'
require 'open-uri'
require 'hpricot'
 
gem 'nokogiri', '>1.0.5'
require 'nokogiri'
require 'benchmark'
 
xml_content = open("http://railstips.org/assets/2008/8/9/timeline.xml").read
html_content = open("http://slashdot.org/").read
 
N = 500
 
hdoc_xml = Hpricot.XML(xml_content)
hdoc_xml2 = Hpricot.scan(xml_content)
hdoc_html = Hpricot(html_content)
hdoc_html2 = Hpricot.scan(html_content)
 
ndoc_compat = Nokogiri.Hpricot(xml_content)
ndoc_xml = Nokogiri::XML(xml_content)
ndoc_html = Nokogiri::HTML(html_content)
 
 
# XML Document Parsing
puts "XML Document parsing benchmark"
Benchmark.bm(25) do |x|
  x.report('hpricot:xml:doc') do
    N.times do
      Hpricot.XML(xml_content)
    end
  end
 
  x.report('hpricot2:xml:doc') do
    N.times do
      Hpricot.scan(xml_content)
    end
  end
 
  x.report('nokogiri:compat:doc') do
    N.times do
      Nokogiri.Hpricot(xml_content)
    end
  end
 
  x.report('nokogiri:xml:doc') do
    N.times do
      Nokogiri::XML(xml_content)
    end
  end
end
 
xpath1 = '//status/text'
xpath2 = '//user/name'
puts
puts "XML XPath benchmarks (#{xpath1}, #{xpath2})"
# XML XPath benchmarks
Benchmark.bm(25) do |x|
  x.report('hpricot:xml:xpath') do
    N.times do
      info = hdoc_xml.search(xpath1).first.inner_text
      url = hdoc_xml.search(xpath2).first.inner_text
    end
  end
 
  x.report('hpricot2:xml:xpath') do
    N.times do
      info = hdoc_xml2.search(xpath1).first.inner_text
      url = hdoc_xml2.search(xpath2).first.inner_text
    end
  end
 
  x.report('nokogiri:compat:xpath') do
    N.times do
      info = ndoc_compat.search(xpath1).first.inner_text
      url = ndoc_compat.search(xpath2).first.inner_text
    end
  end
 
  x.report('nokogiri:xml:xpath') do
    N.times do
      info = ndoc_xml.search(xpath1).first.inner_text
      url = ndoc_xml.search(xpath2).first.inner_text
    end
  end
end
 
# XML CSS benchmarks
css1 = 'status text'
css2 = 'user name'
puts
puts "XML CSS selector benchmarks (#{css1}, #{css2})"
Benchmark.bm(25) do |x|
  x.report('hpricot:xml:css') do
    N.times do
      info = hdoc_xml.search(css1).first.inner_text
      url = hdoc_xml.search(css2).first.inner_text
    end
  end
 
  x.report('hpricot2:xml:css') do
    N.times do
      info = hdoc_xml2.search(css1).first.inner_text
      url = hdoc_xml2.search(css2).first.inner_text
    end
  end
 
  x.report('nokogiri:compat:css') do
    N.times do
      info = ndoc_compat.search(css1).first.inner_text
      url = ndoc_compat.search(css2).first.inner_text
    end
  end
 
  x.report('nokogiri:xml:css') do
    N.times do
      info = ndoc_xml.search(css1).first.inner_text
      url = ndoc_xml.search(css2).first.inner_text
    end
  end
end
 
# HTML Document Parsing
puts
puts "HTML Document parsing benchmark (slashdot.org)"
Benchmark.bm(25) do |x|
  x.report('hpricot:html:doc') do
    N.times do
      Hpricot(html_content)
    end
  end
 
  x.report('hpricot2:html:doc') do
    N.times do
      Hpricot.scan(html_content)
    end
  end
 
  x.report('nokogiri:html:doc') do
    N.times do
      Nokogiri::HTML(html_content)
    end
  end
end
 
# HTML XPath benchmarks
html_xpath = '//h1/a'
puts
puts "HTML XPath benchmarks (#{html_xpath})"
Benchmark.bm(25) do |x|
  x.report('hpricot:html:xpath') do
    N.times do
      info = hdoc_html.search(html_xpath).first.inner_text
    end
  end
 
  # Fail
  #x.report('hpricot2:html:xpath') do
  # N.times do
  # info = hdoc_html2.search(html_xpath).first.inner_text
  # end
  #end
  #
  x.report('nokogiri:html:xpath') do
    N.times do
      info = ndoc_html.search(html_xpath).first.inner_text
    end
  end
end
 
# EASY HTML CSS benchmarks
html_css = 'h1 > a'
puts
puts "Easy HTML CSS benchmarks (#{html_css})"
Benchmark.bm(25) do |x|
  x.report('hpricot:html:css') do
    N.times do
      info = hdoc_html.search(html_css).first.inner_text
    end
  end
 
  # Fail
  #x.report('hpricot2:html:css') do
  # N.times do
  # info = hdoc_html2.search(html_css).first.inner_text
  # end
  #end
 
  x.report('nokogiri:html:css') do
    N.times do
      info = ndoc_html.search(html_css).first.inner_text
    end
  end
end
 
# HARD HTML CSS benchmarks
hard_html_css = 'div#logo > h1 > a'
puts
puts "Hard HTML CSS benchmarks (#{hard_html_css})"
Benchmark.bm(25) do |x|
  x.report('hpricot:html:css') do
    N.times do
      info = hdoc_html.search(hard_html_css).first.inner_text
    end
  end
 
  # Fail
  #x.report('hpricot2:html:css') do
  # N.times do
  # info = hdoc_html2.search(hard_html_css).first.inner_text
  # end
  #end
 
  x.report('nokogiri:html:css') do
    N.times do
      info = ndoc_html.search(hard_html_css).first.inner_text
    end
  end
end
 
Result
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
XML Document parsing benchmark
                               user system total real
hpricot:xml:doc 10.160000 0.950000 11.110000 ( 11.144462)
hpricot2:xml:doc 0.950000 0.000000 0.950000 ( 0.953266)
nokogiri:compat:doc 0.220000 0.020000 0.240000 ( 0.238401)
nokogiri:xml:doc 0.170000 0.030000 0.200000 ( 0.200283)
 
XML XPath benchmarks (//status/text, //user/name)
                               user system total real
hpricot:xml:xpath 7.580000 1.150000 8.730000 ( 8.728314)
hpricot2:xml:xpath 3.980000 0.530000 4.510000 ( 4.536297)
nokogiri:compat:xpath 0.100000 0.010000 0.110000 ( 0.103876)
nokogiri:xml:xpath 0.060000 0.000000 0.060000 ( 0.069590)
 
XML CSS selector benchmarks (status text, user name)
                               user system total real
hpricot:xml:css 7.650000 1.360000 9.010000 ( 9.035288)
hpricot2:xml:css 3.480000 0.660000 4.140000 ( 4.143033)
nokogiri:compat:css 0.100000 0.010000 0.110000 ( 0.106788)
nokogiri:xml:css 0.070000 0.000000 0.070000 ( 0.076784)
 
HTML Document parsing benchmark (slashdot.org)
                               user system total real
hpricot:html:doc 48.930000 3.640000 52.570000 ( 52.900035)
hpricot2:html:doc 4.500000 0.020000 4.520000 ( 4.518984)
nokogiri:html:doc 3.640000 0.130000 3.770000 ( 3.770642)
 
HTML XPath benchmarks (//h1/a)
                               user system total real
hpricot:html:xpath 8.250000 1.130000 9.380000 ( 9.468790)
nokogiri:html:xpath 0.060000 0.000000 0.060000 ( 0.063775)
 
Easy HTML CSS benchmarks (h1 > a)
                               user system total real
hpricot:html:css 6.780000 1.100000 7.880000 ( 8.137288)
nokogiri:html:css 0.070000 0.010000 0.080000 ( 0.135311)
 
Hard HTML CSS benchmarks (div#logo > h1 > a)
                               user system total real
hpricot:html:css 10.440000 1.740000 12.180000 ( 12.326511)
nokogiri:html:css 0.290000 0.000000 0.290000 ( 0.296762)
 
Summary
1
2
3
4
Use Nokogiri's XPath selectors for fastest speed - CSS-based search is faster than Hpricot but not as fast.
 
Also take note that this benchmark is only shows parsing of XML (not HTML).
 

Owner

mdalessio

Fork Of

Revisions