Skip to content

Instantly share code, notes, and snippets.

@frsyuki
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save frsyuki/7e327b5a67b534cb10c5 to your computer and use it in GitHub Desktop.
Save frsyuki/7e327b5a67b534cb10c5 to your computer and use it in GitHub Desktop.
require 'msgpack'
class PageSchema
def initialize(*keys)
@struct = Struct.new(*keys)
@struct.class_eval do
def to_msgpack(arg=nil)
packer = (arg.is_a?(MessagePack::Packer) ? arg : MessagePack::Packer.new(arg))
packer.write_array_header(self.size)
self.each_pair {|k,v| packer.write(k).write(v) }
packer
end
end
end
def new_page_builder
PageBuilder.new(@struct)
end
end
class PageBuilder
def initialize(struct)
@struct = struct
@packer = MessagePack::Packer.new
end
attr_reader :struct
def add(time, struct)
@packer.write time
@packer.write struct.values
end
def build
PageEventStream.new(@struct, @packer.buffer)
end
end
class PageEventStream
def initialize(struct, buffer)
@struct = struct
@buffer = buffer
@unpacker = MessagePack::Unpacker.new(buffer)
end
def each(&block)
until @buffer.empty?
time = @unpacker.read
values = @unpacker.read
yield time, @struct.new(*values).freeze # freeze to be compatible with Hash
end
end
end
class MultiEventStream
def initialize
@time_array = []
@record_array = []
end
def add(time, record)
@time_array << time
@record_array << record
end
def each(&block)
time_array = @time_array
record_array = @record_array
for i in 0..time_array.length-1
block.call(time_array[i], record_array[i])
end
nil
end
end
require 'benchmark'
LOOP = 5000
BATCH = 100
NKEYS = 20
TIME = Time.now.to_i
vals = (0..NKEYS).map {|i| "val#{i}" }
keys = (0..NKEYS).map {|i| "col#{i}" }
page = nil
multi = nil
Benchmark.bmbm do |x|
x.report("build PageBuilder") {
schema = PageSchema.new(*keys.map {|k| k.to_sym })
LOOP.times do
b = schema.new_page_builder
BATCH.times do
b.add(TIME, b.struct.new(*vals))
end
page = b.build
end
}
x.report("build MultiEventStream") {
LOOP.times do
b = MultiEventStream.new
BATCH.times do
b.add(TIME, Hash[keys.zip(vals)])
end
multi = b
end
}
x.report("read PageBuilder") {
LOOP.times do
page.each {|time,record| record['col1'] }
end
}
x.report("read MultiEventStream") {
LOOP.times do
multi.each {|time,record| record['col1'] }
end
}
end
=begin
LOOP = 5000
BATCH = 100
user system total real
build PageBuilder 0.790000 0.060000 0.850000 ( 0.851907)
build MultiEventStream 4.700000 0.050000 4.750000 ( 4.754573)
read PageBuilder 0.000000 0.000000 0.000000 ( 0.002188)
read MultiEventStream 0.180000 0.000000 0.180000 ( 0.176825)
=end
=begin
LOOP = 50000
BATCH = 10
user system total real
build PageBuilder 0.980000 0.140000 1.120000 ( 1.120893)
build MultiEventStream 4.450000 0.050000 4.500000 ( 4.513649)
read PageBuilder 0.040000 0.010000 0.050000 ( 0.038152)
read MultiEventStream 0.210000 0.000000 0.210000 ( 0.207248)
=end
=begin
LOOP = 500000
BATCH = 1
user system total real
build PageBuilder 2.970000 1.030000 4.000000 ( 4.019263)
build MultiEventStream 4.470000 0.080000 4.550000 ( 4.552731)
read PageBuilder 0.340000 0.010000 0.350000 ( 0.346582)
read MultiEventStream 0.580000 0.020000 0.600000 ( 0.593196)
=end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment