Skip to content

Instantly share code, notes, and snippets.

@yfeldblum
Forked from ahoward/grid_fs.rb
Created July 15, 2012 16:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yfeldblum/3117686 to your computer and use it in GitHub Desktop.
Save yfeldblum/3117686 to your computer and use it in GitHub Desktop.
implementation of grid_fs on top of mongoid
require "mime/types"
require "digest/md5"
require "cgi"
### see: http://www.mongodb.org/display/DOCS/GridFS+Specification
class GridFS
##
#
attr_accessor :prefix
attr_accessor :namespace
attr_accessor :file_model
attr_accessor :chunk_model
##
#
def initialize(options = {})
options.to_options!
@prefix = options[:prefix] || 'fs'
@namespace = GridFS.namespace_for(@prefix)
@file_model = @namespace.file_model
@chunk_model = @file_model.chunk_model
end
##
#
def GridFS.namespace_for(prefix)
prefix = prefix.to_s.downcase
const = "::GridFS::#{ prefix.to_s.camelize }"
namespace = const.split(/::/).last
const_defined?(namespace) ? const_get(namespace) : build_namespace_for(namespace)
end
##
#
def GridFS.build_namespace_for(prefix)
prefix = prefix.to_s.downcase
const = prefix.camelize
namespace =
Module.new do
module_eval(&NamespaceMixin)
self
end
const_set(const, namespace)
file_model = build_file_model_for(namespace)
chunk_model = build_chunk_model_for(namespace)
file_model.namespace = namespace
chunk_model.namespace = namespace
file_model.chunk_model = chunk_model
chunk_model.file_model = file_model
namespace.prefix = prefix
namespace.file_model = file_model
namespace.chunk_model = chunk_model
namespace.send(:const_set, :File, file_model)
namespace.send(:const_set, :Chunk, chunk_model)
at_exit{ file_model.create_indexes rescue nil }
at_exit{ chunk_model.create_indexes rescue nil }
const_get(const)
end
NamespaceMixin = proc do
class << self
attr_accessor :prefix
attr_accessor :file_model
attr_accessor :chunk_model
def to_s
prefix
end
def namespace
prefix
end
def put(arg, attributes = {})
chunks = []
file = file_model.new
attributes.to_options!
if attributes.has_key?(:id)
file.id = attributes.delete(:id)
end
if attributes.has_key?(:content_type)
attributes[:contentType] = attributes.delete(:content_type)
end
if attributes.has_key?(:upload_date)
attributes[:uploadDate] = attributes.delete(:upload_date)
end
md5 = Digest::MD5.new
length = 0
chunkSize = file.chunkSize
n = 0
GridFS.read(arg) do |io|
filename = [file.id.to_s, GridFS.extract_basename(io)].join('/').squeeze('/')
content_type = GridFS.extract_content_type(filename) || file.contentType
attributes[:filename] ||= filename
attributes[:contentType] ||= content_type
while((buf = io.read(chunkSize)))
md5 << buf
length += buf.size
chunk = file.chunks.build
chunk.data = binary_for(buf)
chunk.n = n
n += 1
chunk.save!
chunks.push(chunk)
end
end
attributes[:length] ||= length
attributes[:uploadDate] ||= Time.now.utc
attributes[:md5] ||= md5.hexdigest
file.update_attributes(attributes)
file.save!
file
ensure
chunks.each{|chunk| chunk.destroy} if $! rescue nil
end
if defined?(Moped)
def binary_for(buf)
BSON::Binary.new(:generic, buf.bytes.to_a)
end
else
def binary_for(buf)
BSON::Binary.new(buf.bytes.to_a)
end
end
def get(id)
file_model.find(id)
end
def delete(id)
file_model.find(id).destroy
end
end
end
##
#
def GridFS.build_file_model_for(namespace)
prefix = namespace.name.split(/::/).last.downcase
file_model_name = "#{ namespace.name }::File"
chunk_model_name = "#{ namespace.name }::Chunk"
Class.new do
include Mongoid::Document
singleton_class = class << self; self; end
singleton_class.instance_eval do
define_method(:name){ file_model_name }
attr_accessor :chunk_model
attr_accessor :namespace
end
self.collection_name = "#{ prefix }.files"
field(:filename, :type => String)
field(:contentType, :type => String, :default => 'application/octet-stream')
field(:length, :type => Integer, :default => 0)
field(:chunkSize, :type => Integer, :default => (256 * (2 ** 20)))
field(:uploadDate, :type => Date, :default => Time.now.utc)
field(:md5, :type => String, :default => Digest::MD5.hexdigest(''))
has_many(:chunks, :class_name => chunk_model_name, :inverse_of => :files, :dependent => :destroy, :order => [:n, :asc])
index([[:filename, 1]], :unique => true)
def basename
::File.basename(filename)
end
def prefix
self.class.namespace.prefix
end
def each(&block)
chunks.all.order_by([:n, :asc]).each do |chunk|
block.call(chunk.data.to_s)
end
end
def to_s
to_s = ''
each{|data| to_s << data}
to_s
end
def bytes(&block)
if block
each{|data| block.call(data)}
length
else
bytes = []
each{|data| bytes.push(*data)}
bytes
end
end
def close
self
end
def content_type
contentType
end
def update_date
updateDate
end
def created_at
updateDate
end
def namespace
self.class.namespace
end
end
end
##
#
def GridFS.build_chunk_model_for(namespace)
prefix = namespace.name.split(/::/).last.downcase
file_model_name = "#{ namespace.name }::File"
chunk_model_name = "#{ namespace.name }::Chunk"
Class.new do
include Mongoid::Document
singleton_class = class << self; self; end
singleton_class.instance_eval do
define_method(:name){ chunk_model_name }
attr_accessor :file_model
attr_accessor :namespace
end
self.collection_name = "#{ prefix }.chunks"
field(:n, :type => Integer, :default => 0)
field(:data, :type => Binary)
belongs_to(:file, :foreign_key => :files_id, :class_name => file_model_name)
index([[:files_id, 1, :n, -1]], :unique => true)
def namespace
self.class.namespace
end
end
end
##
#
GridFS.build_namespace_for(:Fs)
File = Fs.file_model
Chunk = Fs.chunk_model
%w( put get delete ).each do |method|
class_eval <<-__
def GridFS.#{ method }(*args, &block)
::GridFS::Fs::#{ method }(*args, &block)
end
__
end
##
#
def GridFS.read(arg, &block)
if arg.respond_to?(:read)
rewind(arg) do |io|
block.call(io)
end
else
open(arg.to_s) do |io|
block.call(io)
end
end
end
def GridFS.rewind(io, &block)
begin
pos = io.pos
io.flush
io.rewind
rescue
nil
end
begin
block.call(io)
ensure
begin
io.pos = pos
rescue
nil
end
end
end
def GridFS.extract_basename(object)
filename = nil
[:original_path, :original_filename, :path, :filename, :pathname].each do |msg|
if object.respond_to?(msg)
filename = object.send(msg)
break
end
end
filename ? cleanname(filename) : nil
end
def GridFS.extract_content_type(filename)
content_type = MIME::Types.type_for(::File.basename(filename.to_s)).first
content_type.to_s if content_type
end
def GridFS.cleanname(pathname)
basename = ::File.basename(pathname.to_s)
CGI.unescape(basename).gsub(%r/[^0-9a-zA-Z_@)(~.-]/, '_').gsub(%r/_+/,'_')
end
end
require 'test_helper'
##
# see: http://www.mongodb.org/display/DOCS/GridFS+Specification
#
class GridFSTest < ActiveSupport::TestCase
##
#
test "files have the required fields" do
file = assert{ GridFS::File.new }
required_fields =
%w(
_id
length
chunkSize
uploadDate
md5
)
optional_fields =
%w(
filename
contentType
)
fields = assert{ GridFS::File.fields.keys.map(&:to_s) }
(required_fields + optional_fields).each do |field|
assert{ fields.include?(field) }
end
end
##
#
test "chunks have the required fields" do
required_fields =
%w(
_id
n
data
)
optional_fields =
%w(
)
fields = assert{ GridFS::Chunk.fields.keys.map(&:to_s) }
(required_fields + optional_fields).each do |field|
assert{ fields.include?(field) }
end
end
##
#
test "that a simple interface exists" do
path = __FILE__
io = open(path)
buf = IO.read(path)
file = assert{ GridFS.put(io) }
assert{ ::File.basename(file.filename) == ::File.basename(path) }
assert{ file.length == buf.size }
assert{ file.to_s == buf }
assert{ GridFS.get(file.id).to_s == file.to_s }
chunks = assert{ file.chunks }
chunks.each do |chunk|
assert{ GridFS::Chunk.find(chunk.id) }
end
assert{ GridFS.delete(file.id); true }
assert_raises(subclass_of(Exception)){ GridFS::File.find(file.id) }
chunks.each do |chunk|
assert_raises(subclass_of(Exception)){ GridFS::Chunk.find(chunk.id) }
end
end
##
#
test "that files can be streamed using #each" do
path = __FILE__
open(path) do |io|
file = assert{ GridFS.put(io) }
buf = ''
file.each{|data| buf << data}
assert{ buf == IO.read(path) }
end
end
##
#
test "that it's compatiable with the 'mongofiles' cli tool" do
path = __FILE__
basename = File.basename(path)
buf = IO.read(path)
file = assert{ open(path){|io| GridFS.put(io) } }
tmpdir do
filename = file.filename
dirname = File.dirname(filename)
basename = File.basename(filename)
FileUtils.mkdir_p(dirname) unless test(?d, dirname)
db = assert{ Mongoid.database.name }
`mongofiles --db #{ db.inspect } get #{ filename.inspect }`
assert{ IO.read(filename) == file.to_s }
assert{ GridFS::File.where(:filename => filename).all.each{|file| file.destroy}; true }
assert{ GridFS::File.where(:filename => filename).first.nil? }
`mongofiles --db #{ db.inspect } put #{ filename.inspect }`
file = assert{ GridFS::File.where(:filename => filename).first }
assert{ IO.read(filename) == file.to_s }
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment