pat/README.md

## README.md

      
    Raw
  

              README.md
            
          
    A simple Ruby class for uploading really large files to S3 via @envato's awsraw gem.
Until this pull request is merged in, you'll need to use my fork of the gem.
MultipartUploader.new(connection, path, key).upload
Build a Faraday connection as shown below with AWSRaw credentials, then you can re-use it in the uploader.
The path argument is the full file path on the computer this code runs on, and the key argument is the path where the file should be located within the bucket. For example:
uploader = MultipartUploader.new connection,
  "/Users/pat/Code/test.bigfile", "testing/test.bigfile"
Don't forget to set up your AWS credentials and bucket/subdomain details accordingly!

  
## multipart_uploader.rb
require 'awsraw'
require 'nokogiri'

ACCESS_KEY_ID     = '...'
SECRET_ACCESS_KEY = '...'
BUCKET            = '...'
REGION            = 's3-ap-southeast-2'
PREFIX            = "#{BUCKET}.#{REGION}"

credentials = AWSRaw::Credentials.new(
  :access_key_id     => ACCESS_KEY_ID,
  :secret_access_key => SECRET_ACCESS_KEY
)

connection = Faraday.new("http://#{PREFIX}.amazonaws.com") do |faraday|
  faraday.use      AWSRaw::S3::FaradayMiddleware, credentials
  faraday.response :logger
  faraday.adapter  Faraday.default_adapter
end

class MultipartUploader
  MULTIPART_CHUNK_SIZE = 1024 * 1024 * 100 # 100MB

  def initialize(connection, path, key)
    @connection, @path, @key = connection, path, key
    @tags = []
  end

  def upload
    initiate
    upload_parts
  rescue
    abort
    raise
  else
    complete
  end

  private

  attr_reader :connection, :path, :key, :upload_id, :tags

  def abort
    connection.delete do |request|
      request.url "#{key}?uploadId=#{upload_id}"
      request.headers['Content-Type'] = 'binary/octel-stream'
    end unless upload_id.nil?
  end

  def complete
    body = completion_body

    connection.post do |request|
      request.url "#{key}?uploadId=#{upload_id}"
      request.headers['Content-Length'] = body.length.to_s
      request.headers['Content-Type']   = 'binary/octel-stream'
      request.body = body
    end
  end

  def completion_body
    data = "<CompleteMultipartUpload>"
    tags.each_with_index do |tag, index|
      data << "<Part>"
      data << "<PartNumber>#{index + 1}</PartNumber>"
      data << "<ETag>#{tag}</ETag>"
      data << "</Part>"
    end
    data << "</CompleteMultipartUpload>"
  end

  def initiate
    response = connection.post do |request|
      request.url "#{key}?uploads"
      request.headers['Content-Type'] = 'binary/octel-stream'
    end

    document = Nokogiri response.body
    @upload_id = document.css('UploadId').first.text
  end

  def upload_part(part, number)
    connection.put do |request|
      request.url "#{key}?partNumber=#{number}&uploadId=#{upload_id}"
      request.headers['Content-Type']   = 'binary/octel-stream'
      request.headers['Content-Length'] = part.bytesize.to_s
      request.body = part
    end
  end

  def upload_parts
    body = File.open(path, 'r')

    while (chunk = body.read(MULTIPART_CHUNK_SIZE)) do
      tags << upload_part(chunk, tags.length + 1).headers['ETag'].gsub(/"/, '')
    end
  end
end
	require 'awsraw'
	require 'nokogiri'

	ACCESS_KEY_ID = '...'
	SECRET_ACCESS_KEY = '...'
	BUCKET = '...'
	REGION = 's3-ap-southeast-2'
	PREFIX = "#{BUCKET}.#{REGION}"

	credentials = AWSRaw::Credentials.new(
	:access_key_id => ACCESS_KEY_ID,
	:secret_access_key => SECRET_ACCESS_KEY
	)

	connection = Faraday.new("http://#{PREFIX}.amazonaws.com") do \|faraday\|
	faraday.use AWSRaw::S3::FaradayMiddleware, credentials
	faraday.response :logger
	faraday.adapter Faraday.default_adapter
	end

	class MultipartUploader
	MULTIPART_CHUNK_SIZE = 1024 * 1024 * 100 # 100MB

	def initialize(connection, path, key)
	@connection, @path, @key = connection, path, key
	@tags = []
	end

	def upload
	initiate
	upload_parts
	rescue
	abort
	raise
	else
	complete
	end

	private

	attr_reader :connection, :path, :key, :upload_id, :tags

	def abort
	connection.delete do \|request\|
	request.url "#{key}?uploadId=#{upload_id}"
	request.headers['Content-Type'] = 'binary/octel-stream'
	end unless upload_id.nil?
	end

	def complete
	body = completion_body

	connection.post do \|request\|
	request.url "#{key}?uploadId=#{upload_id}"
	request.headers['Content-Length'] = body.length.to_s
	request.headers['Content-Type'] = 'binary/octel-stream'
	request.body = body
	end
	end

	def completion_body
	data = "<CompleteMultipartUpload>"
	tags.each_with_index do \|tag, index\|
	data << "<Part>"
	data << "<PartNumber>#{index + 1}</PartNumber>"
	data << "<ETag>#{tag}</ETag>"
	data << "</Part>"
	end
	data << "</CompleteMultipartUpload>"
	end

	def initiate
	response = connection.post do \|request\|
	request.url "#{key}?uploads"
	request.headers['Content-Type'] = 'binary/octel-stream'
	end

	document = Nokogiri response.body
	@upload_id = document.css('UploadId').first.text
	end

	def upload_part(part, number)
	connection.put do \|request\|
	request.url "#{key}?partNumber=#{number}&uploadId=#{upload_id}"
	request.headers['Content-Type'] = 'binary/octel-stream'
	request.headers['Content-Length'] = part.bytesize.to_s
	request.body = part
	end
	end

	def upload_parts
	body = File.open(path, 'r')

	while (chunk = body.read(MULTIPART_CHUNK_SIZE)) do
	tags << upload_part(chunk, tags.length + 1).headers['ETag'].gsub(/"/, '')
	end
	end
	end