Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Nuanda/214124 to your computer and use it in GitHub Desktop.
Save Nuanda/214124 to your computer and use it in GitHub Desktop.
# This is the test I use to check the performance of the extension (I use 0.14.1 version of the driver).
require 'rubygems'
require 'mongo'
require 'mongo/gridfs'
host = '127.0.0.1'
port = '27017'
db = Mongo::Connection.new(host, port).db('test')
# Don't be misguided by the file used for benchmark, I run this with plain ruby-1.8 :)
image_data = File.new("../../jruby-bin-1.2.0.tar.gz", "r").read
# The file I require here contains the extension (see the other listing in this gist).
require 'gridfs_fix.rb'
# You may play around with the chunk size, but id does not considerably
# impact the performance
GridFS::GridStore.open(db, "a_file", 'w', :chunk_size => 2000000) {|f|
puts "Using chunk size = " + f.chunk_size.to_s
time_start = Time.now
# Now, exchange the two lines below to use the standard or "hacked" GridFS support :)
f.fast_write(image_data)
#f.write(image_data)
puts "Writing in " + (Time.now - time_start).to_s + " - mean transfer: " + ((image_data.length / 1024)/(Time.now - time_start)).to_s + " kB/s."
}
GridFS::GridStore.open(db, "a_file", 'r') {|f|
time_start = Time.now
# Now, exchange the two lines below to use the standard or "hacked" GridFS support :)
new_data = f.fast_read()
#new_data = f.read()
puts "Reading in " + (Time.now - time_start).to_s + " - mean transfer: " + ((new_data.length / 1024)/(Time.now - time_start)).to_s + " kB/s."
# Let's also store the file so we can check if everything was written/read correctly
File.new("jruby.tgz", "w").write(new_data)
}
# The main idea here is to bypass the standard way the ruby driver handles data to be
# inserted into the GridFS (that is, byte-by-byte appraoch). Instead I try to write the
# entire chunks right away.
class GridFS::GridStore
require 'mongo/util/byte_buffer'
require 'mongo/gridfs/chunk'
def fast_write(string)
raise "#@filename not opened for write" unless @mode[0] == ?w
#p "WRITE. " + (string.length / @chunk_size).to_s + " full chunks of " + @chunk_size.to_s + " B each plus " + (string.length % @chunk_size).to_s + " B rest."
to_write = string.length
while (to_write > 0) do
step_size = (to_write > @chunk_size) ? @chunk_size : to_write
@curr_chunk.data.put_array(ByteBuffer.new(string[-to_write,step_size]).to_a)
to_write -= step_size
if (to_write > 0) then
prev_chunk_number = @curr_chunk.chunk_number
# this is the bottleneck of the current solution - it takes the most time ...
# ... but it is nothing surprising, since this is the actual data transfer code
@curr_chunk.save
@curr_chunk = GridFS::Chunk.new(self, 'n' => prev_chunk_number + 1)
end
end
string.length - to_write
end
def fast_read()
buf = ""
while true do
buf += @curr_chunk.data.to_s
break if @curr_chunk.chunk_number == last_chunk_number
@curr_chunk = nth_chunk(@curr_chunk.chunk_number + 1)
end
buf
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment