Skip to content

Instantly share code, notes, and snippets.

@codingfoo
Last active December 11, 2015 20:59
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codingfoo/4659616 to your computer and use it in GitHub Desktop.
Save codingfoo/4659616 to your computer and use it in GitHub Desktop.
Benchmark sax vs dom parsing for nokogiri
require 'nokogiri'
class MyDoc < Nokogiri::XML::SAX::Document
attr_reader :result
def initialize
@result = ""
end
def xmldecl version, encoding, standalone
@result << version
@result << encoding
@result << standalone
end
def comment string
@result << string
end
def start_element name, attributes = []
b = Hash[attributes]
if name == 'a' || name == 'link'
url = b['href'] || ""
if 'http' == url[0..3]
b['href'] = b['href'].upcase
end
end
attrs = b.collect { |k,v| "#{k}=\"#{v}\"" }.join(' ')
if attrs.empty?
@result << "<#{name}>"
else
@result << "<#{name} #{attrs}>"
end
end
def end_element name
@result << "</#{name}>"
end
def cdata_block string
@result << string
end
def characters str
@result << str
end
def processing_instruction name, content
end
def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
end
def warning(string)
end
def error(string)
end
end
require 'open-uri'
require "benchmark"
include Benchmark
N = 1
diffs = []
File.open("urls.txt").each do |url|
begin
str = open(url).read
Benchmark.benchmark(CAPTION, 7, FORMAT, ">% Change:") do |x|
t_sax = x.report("SAX") do
N.times do
parser = Nokogiri::HTML::SAX::Parser.new(MyDoc.new)
parser.parse(str)
end
end
t_dom = x.report("DOM") do
N.times do
result = Nokogiri::HTML(str)
attribute_name = 'href'
result.css("link[#{attribute_name}]").each do |element|
if 'http' == url[0..3]
element.attributes[attribute_name].value = element.attributes[attribute_name].value.upcase
end
end
result.to_s
end
end
sax_real = t_sax.to_a.last
dom_real = t_dom.to_a.last
diffs << ( (sax_real - dom_real) / dom_real ) * 100
[( (t_sax - t_dom) / t_dom ) * 100]
end
rescue
puts $!, $@
end
end
puts 'end'
diffs.each { |diff| puts "#{diff}\n"}
http://www.ruby-doc.org/
http://en.wikipedia.org/wiki/Pickling
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment