Last active
August 29, 2015 13:57
-
-
Save stevecj/9772994 to your computer and use it in GitHub Desktop.
Benchmarking of ways of extracting single value from large YAML map (e.g. ActiveRecord serialized data)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'yaml' | |
require 'benchmark' | |
def make_yaml_string(num_entries=500) | |
entry_nums = (0...num_entries).to_a | |
entry_nums.shuffle! | |
result = '' | |
entry_nums.each do |n| | |
result << "something_#{n}: #{Random.rand}\n" | |
end | |
result | |
end | |
puts 'Build a lot of big YAML strings to parse ...' | |
yaml_strings = Array.new(500){ make_yaml_string } | |
puts '', 'Extract a specific value using YAML.load ...' | |
last_extracted_value = 'n/a' | |
bench = Benchmark.measure do | |
yaml_strings.each do |yaml_string| | |
data = YAML.load(yaml_string) | |
last_extracted_value = data['something_99'] | |
end | |
end | |
puts "Last value extracted: #{last_extracted_value}" | |
puts "Benchmark results:" | |
puts bench | |
# ===== OUTPUT ===== | |
# Extract a specific value using YAML.load ... | |
# Last value extracted: 0.06597329553649589 | |
# Benchmark results: | |
# 4.800000 0.020000 4.820000 ( 4.813697) | |
# The Psych handler to extract the value for the "something_99" key. | |
class ExtractionHandler < YAML::Handler | |
class DoneExtracting < StandardError ; end | |
attr_accessor :something_99, :bail_when_done | |
def start_mapping(*) | |
self.depth += 1 | |
self.is_key = true | |
end | |
def end_mapping(*) | |
self.depth -= 1 | |
end | |
def scalar(value, *args) | |
return unless depth == 1 | |
if is_key | |
self.is_something_99 = (value == 'something_99') | |
elsif is_something_99 | |
self.something_99 = value | |
raise DoneExtracting if bail_when_done | |
end | |
self.is_key = !is_key | |
end | |
attr_writer :depth | |
def depth | |
@depth ||= 0 | |
end | |
attr_accessor :is_key, :is_something_99 | |
end | |
puts '', 'Extract a specific value using YAML parser & custom handler' | |
last_extracted_value = 'n/a' | |
bench = Benchmark.measure do | |
yaml_strings.each do |yaml_string| | |
h = ExtractionHandler.new | |
p = YAML::Parser.new(h) | |
begin | |
p.parse yaml_string | |
rescue ExtractionHandler::DoneExtracting | |
end | |
last_extracted_value = h.something_99 | |
end | |
end | |
puts "Last value extracted: #{last_extracted_value}" | |
puts "Benchmark results:" | |
puts bench | |
# ===== OUTPUT ===== | |
# Extract a specific value using YAML parser & custom handler | |
# Last value extracted: 0.06597329553649589 | |
# Benchmark results: | |
# 1.080000 0.010000 1.090000 ( 1.089758) | |
puts '', 'Extract a specific value using YAML parser & custom handler and bail-when-done' | |
last_extracted_value = 'n/a' | |
bench = Benchmark.measure do | |
yaml_strings.each do |yaml_string| | |
h = ExtractionHandler.new | |
h.bail_when_done = true | |
p = YAML::Parser.new(h) | |
begin | |
p.parse yaml_string | |
rescue ExtractionHandler::DoneExtracting | |
end | |
last_extracted_value = h.something_99 | |
end | |
end | |
puts "Last value extracted: #{last_extracted_value}" | |
puts "Benchmark results:" | |
puts bench | |
# ===== OUTPUT ===== | |
# Extract a specific value using YAML parser & custom handler and bail-when-done | |
# Last value extracted: 0.06597329553649589 | |
# Benchmark results: | |
# 0.540000 0.000000 0.540000 ( 0.547873) | |
puts '', 'Extract a specific value using regular expression' | |
last_extracted_value = 'n/a' | |
regexp = /^ *something_99\s*:\s*(.*)\s*$/ | |
bench = Benchmark.measure do | |
yaml_strings.each do |yaml_string| | |
match = regexp.match(yaml_string) | |
last_extracted_value = match[1] | |
end | |
end | |
puts "Last value extracted: #{last_extracted_value}" | |
puts "Benchmark results:" | |
puts bench | |
# ===== OUTPUT ===== | |
# Extract a specific value using regular expression | |
# Last value extracted: 0.06597329553649589 | |
# Benchmark results: | |
# 0.100000 0.000000 0.100000 ( 0.097476) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment