Skip to content

Instantly share code, notes, and snippets.

@ahoward
Created April 17, 2012 16:42
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ahoward/2407396 to your computer and use it in GitHub Desktop.
Save ahoward/2407396 to your computer and use it in GitHub Desktop.
implementation of grid_fs on top of mongoid
require "mime/types"
require "digest/md5"
require "cgi"
### see: http://www.mongodb.org/display/DOCS/GridFS+Specification
class GridFS
##
#
attr_accessor :prefix
attr_accessor :namespace
attr_accessor :file_model
attr_accessor :chunk_model
##
#
def initialize(options = {})
options.to_options!
@prefix = options[:prefix] || 'fs'
@namespace = GridFS.namespace_for(@prefix)
@file_model = @namespace.file_model
@chunk_model = @file_model.chunk_model
end
##
#
def GridFS.namespace_for(prefix)
prefix = prefix.to_s.downcase
const = "::GridFS::#{ prefix.to_s.camelize }"
namespace = const.split(/::/).last
const_defined?(namespace) ? const_get(namespace) : build_namespace_for(namespace)
end
##
#
def GridFS.build_namespace_for(prefix)
prefix = prefix.to_s.downcase
const = prefix.camelize
namespace =
Module.new do
module_eval(&NamespaceMixin)
self
end
const_set(const, namespace)
file_model = build_file_model_for(namespace)
chunk_model = build_chunk_model_for(namespace)
file_model.namespace = namespace
chunk_model.namespace = namespace
file_model.chunk_model = chunk_model
chunk_model.file_model = file_model
namespace.prefix = prefix
namespace.file_model = file_model
namespace.chunk_model = chunk_model
namespace.send(:const_set, :File, file_model)
namespace.send(:const_set, :Chunk, chunk_model)
at_exit{ file_model.create_indexes rescue nil }
at_exit{ chunk_model.create_indexes rescue nil }
const_get(const)
end
NamespaceMixin = proc do
class << self
attr_accessor :prefix
attr_accessor :file_model
attr_accessor :chunk_model
def to_s
prefix
end
def namespace
prefix
end
def put(arg, attributes = {})
chunks = []
file = file_model.new
attributes.to_options!
if attributes.has_key?(:id)
file.id = attributes.delete(:id)
end
if attributes.has_key?(:content_type)
attributes[:contentType] = attributes.delete(:content_type)
end
if attributes.has_key?(:upload_date)
attributes[:uploadDate] = attributes.delete(:upload_date)
end
md5 = Digest::MD5.new
length = 0
chunkSize = file.chunkSize
n = 0
GridFS.read(arg) do |io|
filename = [file.id.to_s, GridFS.extract_basename(io)].join('/').squeeze('/')
content_type = GridFS.extract_content_type(filename) || file.contentType
attributes[:filename] ||= filename
attributes[:contentType] ||= content_type
while((buf = io.read(chunkSize)))
md5 << buf
length += buf.size
chunk = file.chunks.build
chunk.data = binary_for(buf)
chunk.n = n
n += 1
chunk.save!
chunks.push(chunk)
end
end
attributes[:length] ||= length
attributes[:uploadDate] ||= Time.now.utc
attributes[:md5] ||= md5.hexdigest
file.update_attributes(attributes)
file.save!
file
ensure
chunks.each{|chunk| chunk.destroy} if $! rescue nil
end
if defined?(Moped)
def binary_for(buf)
BSON::Binary.new(:generic, buf.bytes.to_a)
end
else
def binary_for(buf)
BSON::Binary.new(buf.bytes.to_a)
end
end
def get(id)
file_model.find(id)
end
def delete(id)
file_model.find(id).destroy
end
end
end
##
#
def GridFS.build_file_model_for(namespace)
prefix = namespace.name.split(/::/).last.downcase
file_model_name = "#{ namespace.name }::File"
chunk_model_name = "#{ namespace.name }::Chunk"
Class.new do
include Mongoid::Document
singleton_class = class << self; self; end
singleton_class.instance_eval do
define_method(:name){ file_model_name }
attr_accessor :chunk_model
attr_accessor :namespace
end
self.collection_name = "#{ prefix }.files"
field(:filename, :type => String)
field(:contentType, :type => String, :default => 'application/octet-stream')
field(:length, :type => Integer, :default => 0)
field(:chunkSize, :type => Integer, :default => (256 * (2 ** 20)))
field(:uploadDate, :type => Date, :default => Time.now.utc)
field(:md5, :type => String, :default => Digest::MD5.hexdigest(''))
has_many(:chunks, :class_name => chunk_model_name, :inverse_of => :files, :dependent => :destroy, :order => [:n, :asc])
index([[:filename, 1]], :unique => true)
def basename
::File.basename(filename)
end
def prefix
self.class.namespace.prefix
end
def each(&block)
chunks.all.order_by([:n, :asc]).each do |chunk|
block.call(chunk.data.to_s)
end
end
def to_s
to_s = ''
each{|data| to_s << data}
to_s
end
def bytes(&block)
if block
each{|data| block.call(data)}
length
else
bytes = []
each{|data| bytes.push(*data)}
bytes
end
end
def close
self
end
def content_type
contentType
end
def update_date
updateDate
end
def created_at
updateDate
end
def namespace
self.class.namespace
end
end
end
##
#
def GridFS.build_chunk_model_for(namespace)
prefix = namespace.name.split(/::/).last.downcase
file_model_name = "#{ namespace.name }::File"
chunk_model_name = "#{ namespace.name }::Chunk"
Class.new do
include Mongoid::Document
singleton_class = class << self; self; end
singleton_class.instance_eval do
define_method(:name){ chunk_model_name }
attr_accessor :file_model
attr_accessor :namespace
end
self.collection_name = "#{ prefix }.chunks"
field(:n, :type => Integer, :default => 0)
field(:data, :type => Binary)
belongs_to(:file, :foreign_key => :files_id, :class_name => file_model_name)
index([[:files_id, 1, :n, -1]], :unique => true)
def namespace
self.class.namespace
end
end
end
##
#
GridFS.build_namespace_for(:Fs)
File = Fs.file_model
Chunk = Fs.chunk_model
%w( put get delete ).each do |method|
class_eval <<-__
def GridFS.#{ method }(*args, &block)
::GridFS::Fs::#{ method }(*args, &block)
end
__
end
##
#
def GridFS.read(arg, &block)
if arg.respond_to?(:read)
rewind(arg) do |io|
block.call(io)
end
else
open(arg.to_s) do |io|
block.call(io)
end
end
end
def GridFS.rewind(io, &block)
begin
pos = io.pos
io.flush
io.rewind
rescue
nil
end
begin
block.call(io)
ensure
begin
io.pos = pos
rescue
nil
end
end
end
def GridFS.extract_basename(object)
filename = nil
[:original_path, :original_filename, :path, :filename, :pathname].each do |msg|
if object.respond_to?(msg)
filename = object.send(msg)
break
end
end
filename ? cleanname(filename) : nil
end
def GridFS.extract_content_type(filename)
content_type = MIME::Types.type_for(::File.basename(filename.to_s)).first
content_type.to_s if content_type
end
def GridFS.cleanname(pathname)
basename = ::File.basename(pathname.to_s)
CGI.unescape(basename).gsub(%r/[^0-9a-zA-Z_@)(~.-]/, '_').gsub(%r/_+/,'_')
end
end
require 'test_helper'
##
# see: http://www.mongodb.org/display/DOCS/GridFS+Specification
#
class GridFSTest < ActiveSupport::TestCase
##
#
test "files have the required fields" do
file = assert{ GridFS::File.new }
required_fields =
%w(
_id
length
chunkSize
uploadDate
md5
)
optional_fields =
%w(
filename
contentType
)
fields = assert{ GridFS::File.fields.keys.map(&:to_s) }
(required_fields + optional_fields).each do |field|
assert{ fields.include?(field) }
end
end
##
#
test "chunks have the required fields" do
required_fields =
%w(
_id
n
data
)
optional_fields =
%w(
)
fields = assert{ GridFS::Chunk.fields.keys.map(&:to_s) }
(required_fields + optional_fields).each do |field|
assert{ fields.include?(field) }
end
end
##
#
test "that a simple interface exists" do
path = __FILE__
io = open(path)
buf = IO.read(path)
file = assert{ GridFS.put(io) }
assert{ ::File.basename(file.filename) == ::File.basename(path) }
assert{ file.length == buf.size }
assert{ file.to_s == buf }
assert{ GridFS.get(file.id).to_s == file.to_s }
chunks = assert{ file.chunks }
chunks.each do |chunk|
assert{ GridFS::Chunk.find(chunk.id) }
end
assert{ GridFS.delete(file.id); true }
assert_raises(subclass_of(Exception)){ GridFS::File.find(file.id) }
chunks.each do |chunk|
assert_raises(subclass_of(Exception)){ GridFS::Chunk.find(chunk.id) }
end
end
##
#
test "that files can be streamed using #each" do
path = __FILE__
open(path) do |io|
file = assert{ GridFS.put(io) }
buf = ''
file.each{|data| buf << data}
assert{ buf == IO.read(path) }
end
end
##
#
test "that it's compatiable with the 'mongofiles' cli tool" do
path = __FILE__
basename = File.basename(path)
buf = IO.read(path)
file = assert{ open(path){|io| GridFS.put(io) } }
tmpdir do
filename = file.filename
dirname = File.dirname(filename)
basename = File.basename(filename)
FileUtils.mkdir_p(dirname) unless test(?d, dirname)
db = assert{ Mongoid.database.name }
`mongofiles --db #{ db.inspect } get #{ filename.inspect }`
assert{ IO.read(filename) == file.to_s }
assert{ GridFS::File.where(:filename => filename).all.each{|file| file.destroy}; true }
assert{ GridFS::File.where(:filename => filename).first.nil? }
`mongofiles --db #{ db.inspect } put #{ filename.inspect }`
file = assert{ GridFS::File.where(:filename => filename).first }
assert{ IO.read(filename) == file.to_s }
end
end
end
@cookrn
Copy link

cookrn commented Apr 17, 2012

Are you missing asserts in grid_fs_test.rb on #L30 and #L53?

@ahoward
Copy link
Author

ahoward commented Apr 17, 2012

oh - so i am. thx.

@adkron
Copy link

adkron commented Jun 22, 2012

Are you going to make a gem out of this?

@ahoward
Copy link
Author

ahoward commented Jun 22, 2012

sure. you need one?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment