Last active
December 22, 2015 00:39
-
-
Save pedrocr/6391196 to your computer and use it in GitHub Desktop.
Comparing sax-machine and xmlcodec to parse an Atom feed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
ONGOING_URL="https://www.tbray.org/ongoing/ongoing.atom" | |
# For some reason net/http can't download the feed properly | |
XML_TEXT = IO.popen("curl -s #{ONGOING_URL}").read | |
## First with sax-machine | |
require 'sax-machine' | |
class SM_AtomContent | |
include SAXMachine | |
attribute :type | |
value :text | |
end | |
class SM_AtomEntry | |
include SAXMachine | |
element :content, :class => SM_AtomContent | |
end | |
class SM_Atom | |
include SAXMachine | |
elements :entry, :as => :entries, :class => SM_AtomEntry | |
end | |
feed = SM_Atom.parse(XML_TEXT) | |
# sax-machine strips away the HTML tags | |
puts "sax-machine says: "+feed.entries.first.content.text[0..20].inspect | |
## The same with xmlcodec | |
require 'xmlcodec' | |
class XC_Atom < XMLCodec::XMLElement | |
xmlformat 'Atom' | |
end | |
class XC_Feed < XC_Atom | |
elname 'feed' | |
xmlsubel_mult 'entry' | |
end | |
class XC_Entry < XC_Atom | |
elname 'entry' | |
xmlsubel :content | |
end | |
class XC_Content < XC_Atom | |
elname 'content' | |
elallvalue #This is what says "process the subtree of this element as if it was text" | |
end | |
feed = XC_Atom.import_xml XML_TEXT | |
#This actually returns the HTML content | |
puts " xmlcodec says: "+feed.entry[0].content.value[0..20].inspect | |
require 'benchmark' | |
BENCH_ITER = 1000 | |
Benchmark.bm(10) do |x| | |
x.report("sax-machine:") do | |
BENCH_ITER.times do | |
feed = SM_Atom.parse(XML_TEXT) | |
end | |
end | |
x.report(" xmlcodec:") do | |
BENCH_ITER.times do | |
feed = XC_Atom.import_xml XML_TEXT | |
end | |
end | |
end | |
# On my machine xmlcodec is around 10% faster than sax-machine and none of them | |
# seem to use too much memory (not shown here) | |
# | |
# user system total real | |
# sax-machine: 34.880000 0.030000 34.910000 ( 35.064932) | |
# xmlcodec: 30.970000 0.010000 30.980000 ( 31.024513) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment