Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Benchmarking of ways of extracting single value from large YAML map (e.g. ActiveRecord serialized data)
require 'yaml'
require 'benchmark'
def make_yaml_string(num_entries=500)
entry_nums = (0...num_entries).to_a
entry_nums.shuffle!
result = ''
entry_nums.each do |n|
result << "something_#{n}: #{Random.rand}\n"
end
result
end
puts 'Build a lot of big YAML strings to parse ...'
yaml_strings = Array.new(500){ make_yaml_string }
puts '', 'Extract a specific value using YAML.load ...'
last_extracted_value = 'n/a'
bench = Benchmark.measure do
yaml_strings.each do |yaml_string|
data = YAML.load(yaml_string)
last_extracted_value = data['something_99']
end
end
puts "Last value extracted: #{last_extracted_value}"
puts "Benchmark results:"
puts bench
# ===== OUTPUT =====
# Extract a specific value using YAML.load ...
# Last value extracted: 0.06597329553649589
# Benchmark results:
# 4.800000 0.020000 4.820000 ( 4.813697)
# The Psych handler to extract the value for the "something_99" key.
class ExtractionHandler < YAML::Handler
class DoneExtracting < StandardError ; end
attr_accessor :something_99, :bail_when_done
def start_mapping(*)
self.depth += 1
self.is_key = true
end
def end_mapping(*)
self.depth -= 1
end
def scalar(value, *args)
return unless depth == 1
if is_key
self.is_something_99 = (value == 'something_99')
elsif is_something_99
self.something_99 = value
raise DoneExtracting if bail_when_done
end
self.is_key = !is_key
end
attr_writer :depth
def depth
@depth ||= 0
end
attr_accessor :is_key, :is_something_99
end
puts '', 'Extract a specific value using YAML parser & custom handler'
last_extracted_value = 'n/a'
bench = Benchmark.measure do
yaml_strings.each do |yaml_string|
h = ExtractionHandler.new
p = YAML::Parser.new(h)
begin
p.parse yaml_string
rescue ExtractionHandler::DoneExtracting
end
last_extracted_value = h.something_99
end
end
puts "Last value extracted: #{last_extracted_value}"
puts "Benchmark results:"
puts bench
# ===== OUTPUT =====
# Extract a specific value using YAML parser & custom handler
# Last value extracted: 0.06597329553649589
# Benchmark results:
# 1.080000 0.010000 1.090000 ( 1.089758)
puts '', 'Extract a specific value using YAML parser & custom handler and bail-when-done'
last_extracted_value = 'n/a'
bench = Benchmark.measure do
yaml_strings.each do |yaml_string|
h = ExtractionHandler.new
h.bail_when_done = true
p = YAML::Parser.new(h)
begin
p.parse yaml_string
rescue ExtractionHandler::DoneExtracting
end
last_extracted_value = h.something_99
end
end
puts "Last value extracted: #{last_extracted_value}"
puts "Benchmark results:"
puts bench
# ===== OUTPUT =====
# Extract a specific value using YAML parser & custom handler and bail-when-done
# Last value extracted: 0.06597329553649589
# Benchmark results:
# 0.540000 0.000000 0.540000 ( 0.547873)
puts '', 'Extract a specific value using regular expression'
last_extracted_value = 'n/a'
regexp = /^ *something_99\s*:\s*(.*)\s*$/
bench = Benchmark.measure do
yaml_strings.each do |yaml_string|
match = regexp.match(yaml_string)
last_extracted_value = match[1]
end
end
puts "Last value extracted: #{last_extracted_value}"
puts "Benchmark results:"
puts bench
# ===== OUTPUT =====
# Extract a specific value using regular expression
# Last value extracted: 0.06597329553649589
# Benchmark results:
# 0.100000 0.000000 0.100000 ( 0.097476)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.