Skip to content

Instantly share code, notes, and snippets.

@no-reply
Created August 5, 2013 19:16
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save no-reply/6158615 to your computer and use it in GitHub Desktop.
Save no-reply/6158615 to your computer and use it in GitHub Desktop.
Blobs of bagit-for-hydra stuff
module Hybag
class BagImportError < StandardError
end
end
require 'bagit'
module Hybag
module Baggable
def write_bag(path = '')
raise Exception if self.pid == '__DO_NOT_USE__'
# delete any existing bags before making a new one
self.delete_bag(path)
path = bag_dir(path)
FileUtils.mkdir_p path unless File.directory? path
bag = BagIt::Bag.new(path)
#TODO: Writing to bag files is naive; reads file out and writes it.
# Possibly there is a better way to do this.
# add the datastreams to the bag, then manifest
datastreams.each do |label, ds|
unless ds.content.nil?
label = label + mime_extension(ds)
if bag_tags.include? ds
bag.add_tag_file(label) { |f|
f.puts ds.content
}
elsif bag_fedora_tags.values.include? ds
bag.add_tag_file('fedora/' + label) { |f|
f.puts ds.content
}
else
bag.add_file(label) { |f|
f.puts ds.content.force_encoding('UTF-8')
}
end
end
end
bag.tagmanifest!
bag.manifest!
return bag
end
# just an alias for the export job's perform method against self
def queue_bag_export(path = '')
Resque.enqueue(Exporter, self.pid, path)
end
def delete_bag(path = '')
bag_path = bag_dir(path)
FileUtils.rm_rf bag_path if File.directory? bag_path
end
private
# create a safe cross-platform bag path
def bag_dir(path)
# TODO: make bag directory configurable?
path = Rails.root.join("tmp/bags", path) unless path.to_s.starts_with? Rails.root.join("tmp/bags").to_s
return File.join(path, self.pid.safe_filename)
end
#TODO: allow selection of specific content datastreams to bag
# to ignore thumbnails and other derivitives, for example.
# return all content files for bag
def bag_contents
self.datastreams.reject { |label, ds| bag_tags.include?(ds) or bag_fedora_tags.include?(ds) }
end
# return all non-fedora tag files
def bag_tags
self.metadata_streams
end
# return fedora tag files
def bag_fedora_tags
self.datastreams.select { |label, ds| ds.is_a?(ActiveFedora::RelsExtDatastream) or ds.dsid == "DC"}
end
def mime_extension(ds)
if ds.kind_of?(ActiveFedora::NtriplesRDFDatastream)
ext = 'nt'
else
if ds.mimeType == ''
ext = ''
else
ext = MIME::Types[ds.mimeType].first.extensions[0]
end
end
return '.' + ext
end
end
end
require 'bagit'
require 'filemagic'
require 'rdf/ntriples'
module Hybag
# try to ingest the bag
def self.ingest(bag, needs_review=true)
raise BagImportError, "Bag is incomplete: #{bag.bag_dir}" unless bag.complete?
raise BagImportError, "Bag is inconsistent." unless bag.consistent?
raise BagImportError, "Bag is invalid." unless bag.valid?
model, collections = get_relations(bag)
for coll in collections
unless Collection.exists?(coll)
coll = Collection.new(pid: coll)
coll.title = OregonDigital::IdService.noidify(coll.pid)
coll.save
end
end
item = model.constantize.new
for ds in bag.bag_files
label = File.basename(ds, '.*')
opts = {
:mimeType => FileMagic.new(FileMagic::MAGIC_MIME).file(ds).split(';')[0],
:label => label,
:dsid => label
}
# forcing binary may not work for all content?
item.add_file_datastream(File.open(ds).read.force_encoding('BINARY'), opts)
end
# save to generate a pid
item.descMetadata.set = collections
item.save
import_desc_metadata(item, bag)
#TODO: add other tag files
item.review! unless needs_review
return item
end
def self.get_relations(bag)
if(File.exist?(File.join(bag.bag_dir,"fedora","RELS-EXT.rdf")))
model, collections = item_from_rels(File.join(bag.bag_dir,"fedora","RELS-EXT.rdf"))
elsif(File.exist?(File.join(bag.bag_dir,"hybag.yml")))
bagconf = YAML.load(File.read(File.join(bag.bag_dir,"hybag.yml")))
model = bagconf["model"]
collections = bagconf["collections"]
else
#TODO: Fall back to ingest form if no RELS/config
model = "GenericAsset"
collections = []
end
return model, collections
end
private
# Write descMetadata
def self.import_desc_metadata(item, bag)
#TODO: what if descMetadata comes in other formats?
#TODO: if there is more than one RDF datastream, merge the graph
graph = RDF::Graph.load(File.join(bag.bag_dir, 'descMetadata.nt'))
# This assumes that the first subject in the RDF is the bag item
#TODO: actually figure out which subject to overwrite
# could do this by trying to find one which is not also
# an object or predicate
itemSubject = graph.first_subject
graph.each_statement do |statement|
# Overwrite the subject if necessary
if statement.subject == itemSubject
item.descMetadata.append(item.descMetadata.rdf_subject, statement.predicate, statement.object)
else
item.descMetadata.append(statement.subject, statement.predicate, statement.object)
end
end
return item.save
end
# Search extract model and collection associations from a ferora RELS file
def self.item_from_rels(file)
#TODO: Move this method somewhere it can be used by other modules?
model_predicate = "info:fedora/fedora-system:def/model#hasModel"
coll_predicate = "info:fedora/fedora-system:def/relations-external#isMemberOf"
rels_graph = RDF::Graph.load(file)
model = 'GenericAsset'
if(rels_graph.has_predicate?(model_predicate))
model = rels_graph.to_a.select{|x| x.predicate == model_predicate}[0].object.to_s
model["info:fedora/afmodel:"] = ''
end
collections = []
if(rels_graph.has_predicate?(coll_predicate))
collection_triples = rels_graph.to_a.select{|x| x.predicate == coll_predicate}
for triple in collection_triples
collections.append(triple.object.to_s)
end
end
return [model, collections]
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment