Skip to content

Instantly share code, notes, and snippets.

@cbeer
Created March 21, 2012 14:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cbeer/2147407 to your computer and use it in GitHub Desktop.
Save cbeer/2147407 to your computer and use it in GitHub Desktop.
Rubydora large file ingest
# DOESN'T WORK FOR LARGE FILES
# gems/eventmachine-1.0.0.beta.4/lib/em/streamer.rb:88:in `get_chunk': integer 2147483648 too big to convert to `int' (RangeError)
require 'rubygems'
require 'rubydora'
require 'em-http-request'
Rubydora.repository = Rubydora.connect(:url => ENV['host'], :user => ENV['user'], :password => ENV['pass'], :timeout => 1200)
obj = Rubydora::DigitalObject.create('test:1') rescue nil
print Rubydora.repository.datastream_url('test:1', 'content')
EventMachine.run {
EventMachine::HttpRequest.new(ENV['host']).post :path => Rubydora.repository.datastream_url('test:1', 'content'), :file => ENV['file']
}
exit
### Load the file
file = ENV['file']
unless file
puts "ingest.rb file=[file] host=[fedora url] user=[fedora user] pass=[fedora pass]"
exit
end
cmodel = 'afmodel:GenericFile'
pid = "wgbh:#{File.basename(file, File.extname(file))}"
puts "(Re-)creating Fedora object #{pid}, with File datastream containing #{file}"
Rubydora.repository.find(pid).delete rescue nil
obj = Rubydora::DigitalObject.create(pid)
obj.models << "info:fedora/#{cmodel}"
obj.save
#ds = obj['content']
#ds.content = open(file)
#ds.save
ds = obj['rightsMetadata']
ds.content = <<EOF
<rightsMetadata xmlns="http://hydra-collab.stanford.edu/schemas/rightsMetadata/v1" version="0.1">
<copyright>
<human></human>
<machine>
<uvalicense>no</uvalicense>
</machine>
</copyright>
<access type="discover">
<human></human>
<machine></machine>
</access>
<access type="read">
<human></human>
<machine></machine>
</access>
<access type="edit">
<human></human>
<machine>
<person>chris_beer@wgbh.org</person>
</machine>
</access>
<embargo>
<human></human>
<machine></machine>
</embargo>
</rightsMetadata>
EOF
ds.save
obj.save
# WHEEE!
require 'rubygems'
require 'rubydora'
require 'net/http'
require 'uri'
require 'loggable'
require 'net/http/post/multipart'
Rubydora.repository = Rubydora.connect(:url => ENV['host'], :user => ENV['user'], :password => ENV['pass'], :timeout => 1200)
### Load the file
file = ENV['file']
unless file
puts "ingest.rb file=[file] host=[fedora url] user=[fedora user] pass=[fedora pass]"
exit
end
cmodel = 'afmodel:GenericFile'
pid = "wgbh:#{File.basename(file, File.extname(file))}"
puts "(Re-)creating Fedora object #{pid}, with File datastream containing #{file}"
Rubydora.repository.find(pid).delete rescue nil
obj = Rubydora::DigitalObject.create(pid)
obj.models << "info:fedora/#{cmodel}"
obj.save
# Rubydora (read: rest-client) doesn't do streaming requests.. see below for the action
ds = obj['content']
ds_params = ds.send(:to_api_params)
#ds.content = open(file)
#ds.save
ds = obj['rightsMetadata']
ds.content = <<EOF
<rightsMetadata xmlns="http://hydra-collab.stanford.edu/schemas/rightsMetadata/v1" version="0.1">
<copyright>
<human></human>
<machine>
<uvalicense>no</uvalicense>
</machine>
</copyright>
<access type="discover">
<human></human>
<machine></machine>
</access>
<access type="read">
<human></human>
<machine></machine>
</access>
<access type="edit">
<human></human>
<machine>
<person>chris_beer@wgbh.org</person>
</machine>
</access>
<embargo>
<human></human>
<machine></machine>
</embargo>
</rightsMetadata>
EOF
ds.save
obj.save
# Upload the content!
url = URI.parse(ENV['host'])
print "\n"
print "Uploading #{ENV['file']} to #{File.join(url.path, Rubydora.repository.datastream_url(obj.pid, 'content',ds_params))}\n"
req = Net::HTTP::Post::Multipart.new File.join(url.path, Rubydora.repository.datastream_url(obj.pid, 'content',ds_params)), "file" => UploadIO.new(File.new(ENV['file']), 'application/octet-stream', 'file.bin')
req.basic_auth ENV['user'], ENV['pass']
Net::HTTP.start(url.host, url.port) do |http|
http.open_timeout = 5000000
http.read_timeout = 5000000
http.request(req)
end
# DOESN'T WORK FOR LARGE FILES
# /Volumes/Scratch/rubydora/lib/rubydora/datastream.rb:121:in `read': Invalid argument - /Volumes/MLAMellonDigLib2/Vietnam Transfers October 2011/HDS/barcode334930_1.mov (Errno::EINVAL)
require 'rubygems'
require 'rubydora'
require 'loggable'
Rubydora.repository = Rubydora.connect(:url => ENV['host'], :user => ENV['user'], :password => ENV['pass'], :timeout => 1200)
### Load the file
file = ENV['file']
unless file
puts "ingest.rb file=[file] host=[fedora url] user=[fedora user] pass=[fedora pass]"
exit
end
cmodel = 'afmodel:GenericFile'
pid = "wgbh:#{File.basename(file, File.extname(file))}"
puts "(Re-)creating Fedora object #{pid}, with File datastream containing #{file}"
Rubydora.repository.find(pid).delete rescue nil
obj = Rubydora::DigitalObject.create(pid)
obj.models << "info:fedora/#{cmodel}"
obj.save
ds = obj['content']
ds.content = open(file)
ds.save
ds = obj['rightsMetadata']
ds.content = <<EOF
<rightsMetadata xmlns="http://hydra-collab.stanford.edu/schemas/rightsMetadata/v1" version="0.1">
<copyright>
<human></human>
<machine>
<uvalicense>no</uvalicense>
</machine>
</copyright>
<access type="discover">
<human></human>
<machine></machine>
</access>
<access type="read">
<human></human>
<machine></machine>
</access>
<access type="edit">
<human></human>
<machine>
<person>chris_beer@wgbh.org</person>
</machine>
</access>
<embargo>
<human></human>
<machine></machine>
</embargo>
</rightsMetadata>
EOF
ds.save
obj.save
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment