Skip to content

Instantly share code, notes, and snippets.

@billdueber
Created September 28, 2010 17:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save billdueber/601397 to your computer and use it in GitHub Desktop.
Save billdueber/601397 to your computer and use it in GitHub Desktop.
# Code to benchmark various serializations of MARC records using ruby-marc
# Not included is XML -- serialization using ruby-marc is ridiculously slow and the # filesizes are bigger than anything else. Even with the lib-xml reader,
# deserialization is also relatively slow
#
# I didn't bother to benchmark json/pure in later runs because it's just so damn
# slow that it would never be a good choice.
#
# My results can be found at http://robotlibrarian.billdueber.com/sizespeed-of-various-marc-serializations-using-ruby-marc/
require 'marc'
require 'rubygems'
require 'benchmark'
require 'yajl'
require 'msgpack'
jsonsize = 0.0
marcsize = 0.0
mpsize = 0.0
# Use Benchmark.measure
# sjptime = Benchmark::Tms.new(0,0,0,0,0, "JSON Pure")
sytime = Benchmark::Tms.new(0,0,0,0,0, "YAJL")
smtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
smptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
dytime = Benchmark::Tms.new(0,0,0,0,0, "YAJL")
# djptime = Benchmark::Tms.new(0,0,0,0,0, "JSON/Pure")
dmtime = Benchmark::Tms.new(0,0,0,0,0, "MARC")
dmptime = Benchmark::Tms.new(0,0,0,0,0, "Msgpack")
i = 0
iterations = 20
iterations.times do
reader = MARC::XMLReader.new('topics.xml', :parser=>'libxml')
reader.each_with_index do |r, i|
marc = nil
json = nil
mp = nil
smtime += Benchmark.measure {marc = r.to_marc}
dmtime += Benchmark.measure {copy = MARC::Record.new_from_marc(marc)}
smptime += Benchmark.measure{mp = MessagePack.pack(r.to_hash)}
dmptime += Benchmark.measure {copy = MARC::Record.new_from_hash(MessagePack.unpack(mp))}
sytime += Benchmark.measure {json = Yajl::Encoder.encode(r.to_hash)}
dytime += Benchmark.measure {copy = MARC::Record.new_from_hash(Yajl::Parser.parse(json))}
# sjptime += Benchmark.measure {json = JSON.generate(r.to_hash)}
# djptime += Benchmark.measure {copy = MARC::Record.new_from_hash(JSON.parse(json))}
# marcsize += marc.size
# jsonsize += json.size
# mpsize += mp.size
# break if i > 1000
end
end
puts "Total of #{i} records run #{iterations} times"
puts "\nSERIALIZING"
base = smtime.total
puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', smtime.total, smtime.total / base * 100]
puts ' %-15s %8.2f s (%3.0f%%)' % ['YAJL', sytime.total, sytime.total / base * 100]
puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', smptime.total, smptime.total / base * 100]
#puts ' %-15s %8.2f s (%3.0f%%)' % ['Json Pure', sjptime.total, sjptime.total / base * 100]
base = dmtime.total
puts "\nDESERIALIZING"
puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total, dmtime.total / base * 100]
puts ' %-15s %8.2f s (%3.0f%%)' % ['YAJL', dytime.total, dytime.total / base * 100]
puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total, dmptime.total / base * 100]
#puts ' %-15s %8.2f s (%3.0f%%)' % ['Json Pure', djptime.total, djptime.total / base * 100]
base = dmtime.total + smtime.total
puts "\nSERIALIZE + DESERIALIZE"
puts ' %-15s %8.2f s (%3.0f%%)' % ['MARC', dmtime.total + smtime.total, (dmtime.total + smtime.total) / base * 100]
puts ' %-15s %8.2f s (%3.0f%%)' % ['YAJL', dytime.total + sytime.total, (dytime.total + sytime.total) / base * 100]
puts ' %-15s %8.2f s (%3.0f%%)' % ['Msgpack', dmptime.total + smptime.total, (dmptime.total + smptime.total) / base * 100]
#puts ' %-15s %8.2f s (%3.0f%%)' % ['Json Pure', djptime.total + sjptime.total, (djptime.total + sjptime.total) / base * 100]
# puts "\nSIZE"
# puts ' %-15s %8.2f MBytes (%3.0f%%)' % ['MARC', marcsize / (1024.0 * 1024), marcsize / (1.0 * marcsize) * 100]
# puts ' %-15s %8.2f MBytes (%3.0f%%)' % ['Msgpack', mpsize / (1024.0 * 1024), mpsize / (1.0 * marcsize) * 100]
# puts ' %-15s %8.2f MBytes (%3.0f%%)' % ['JSON', jsonsize / (1024.0 * 1024), jsonsize / (1.0 * marcsize) * 100]
# puts ' %-15s %8.2f MBytes (%3.0f%%)' % ['XML', 97958300 / (1024.0 * 1024), 97958300.0 / marcsize * 100]
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment