Alexand/glacier_uploader.rb

## glacier_uploader.rb
#!/usr/bin/env ruby

# Inspired by https://gist.github.com/flah00/959251f74ba2aa797b24

# Usage: ruby glacier_uploader.rb VAULT /PATH/TO/FILE [NUM_PARTS] [RETRIES]
# files larger than 100mb must be broken up into chunks 1mb-4gb
# total chunks cannot exceed 10,000
# chunk sizes must be a power of 2

require 'aws-sdk'
require 'date'

Aws.config[:logger] = Logger.new $stdout

@vault_name = ARGV[0] || raise("Missing vault name")
@file_path  = ARGV[1] || raise("Missing file path")
@nparts     = ARGV[2].nil? ? 9_999 : ARGV[2].to_i
@max_retries= ARGV[3].nil? ? 3 : ARGV[3].to_i

@glacier_n = Aws::Glacier::Client.new logger: nil
@glacier = Aws::Glacier::Client.new
@file    = File.open(@file_path, "r")

def abort_upload(e)
  if @upload_id
    @glacier.abort_multipart_upload(
      vault_name: @vault_name,
      upload_id: @upload_id,
    )
  end
  $stderr.puts "ERROR #{e.class}: #{e}"
  exit(1)
end

if (size = File.size?(@file_path)) > 1073741824
  s = size / @nparts
  @part_size = 2 ** Math.log2(s).ceil

  resp = @glacier.initiate_multipart_upload(
    vault_name: @vault_name,
    part_size: @part_size,
    archive_description: @file.path,
  )
  @upload_id = resp.data.upload_id

  # Keep a rolling tree hash of the entire file, required to complete the
  # multipart upload at the end
  tree_hash = Aws::TreeHash.new

  # Upload the file in chunks and extract from each response the tree hash
  # for that chunk. This eliminates the need to compute the total tree hash
  # of the object in a second pass.
  offset = 0
  retries = 0
  until @file.eof?
    chunk = @file.read(@part_size)
    begin
      resp = @glacier_n.upload_multipart_part(
        vault_name: @vault_name,
        upload_id: @upload_id,
        body: chunk,
        range: "bytes #{offset}-#{offset+chunk.bytesize-1}/*"
      )
    rescue Aws::Glacier::Errors::ServiceError => e
      if retries > @max_retries
        abort_upload(e)
      else
        puts "#{Time.now.to_json} ERROR offset #{offset} #{e.class}: #{e}"
        retries += 1
        sleep 60
        retry
      end
    end
    tree_hash.hashes.concat(resp.context[:tree_hash].hashes)
    puts "INFO Uploaded offset #{offset}"
    offset += chunk.bytesize
    retries = 0
  end

  # complete the multipart upload
  begin
    resp = @glacier.complete_multipart_upload(
      vault_name: @vault_name,
      upload_id: @upload_id,
      archive_size: File.size?(@file.path),
      checksum: tree_hash.digest
    )
  rescue Aws::Glacier::Errors::ServiceError => e
    abort_upload(e)
  end

else
  begin
    resp = @glacier_n.upload_archive(
      vault_name: @vault_name,
      archive_description: @file.path,
      body: @file
    )
  rescue Aws::Glacier::Errors::ServiceError => e
    abort_upload(e)
  end
end

puts "ARCHIVE_ID: #{resp.archive_id}"
puts "SIZE: #{size}"
puts "DATE: #{Date.today}"
	#!/usr/bin/env ruby

	# Inspired by https://gist.github.com/flah00/959251f74ba2aa797b24

	# Usage: ruby glacier_uploader.rb VAULT /PATH/TO/FILE [NUM_PARTS] [RETRIES]
	# files larger than 100mb must be broken up into chunks 1mb-4gb
	# total chunks cannot exceed 10,000
	# chunk sizes must be a power of 2

	require 'aws-sdk'
	require 'date'

	Aws.config[:logger] = Logger.new $stdout

	@vault_name = ARGV[0] \|\| raise("Missing vault name")
	@file_path = ARGV[1] \|\| raise("Missing file path")
	@nparts = ARGV[2].nil? ? 9_999 : ARGV[2].to_i
	@max_retries= ARGV[3].nil? ? 3 : ARGV[3].to_i

	@glacier_n = Aws::Glacier::Client.new logger: nil
	@glacier = Aws::Glacier::Client.new
	@file = File.open(@file_path, "r")

	def abort_upload(e)
	if @upload_id
	@glacier.abort_multipart_upload(
	vault_name: @vault_name,
	upload_id: @upload_id,
	)
	end
	$stderr.puts "ERROR #{e.class}: #{e}"
	exit(1)
	end

	if (size = File.size?(@file_path)) > 1073741824
	s = size / @nparts
	@part_size = 2 ** Math.log2(s).ceil

	resp = @glacier.initiate_multipart_upload(
	vault_name: @vault_name,
	part_size: @part_size,
	archive_description: @file.path,
	)
	@upload_id = resp.data.upload_id

	# Keep a rolling tree hash of the entire file, required to complete the
	# multipart upload at the end
	tree_hash = Aws::TreeHash.new

	# Upload the file in chunks and extract from each response the tree hash
	# for that chunk. This eliminates the need to compute the total tree hash
	# of the object in a second pass.
	offset = 0
	retries = 0
	until @file.eof?
	chunk = @file.read(@part_size)
	begin
	resp = @glacier_n.upload_multipart_part(
	vault_name: @vault_name,
	upload_id: @upload_id,
	body: chunk,
	range: "bytes #{offset}-#{offset+chunk.bytesize-1}/*"
	)
	rescue Aws::Glacier::Errors::ServiceError => e
	if retries > @max_retries
	abort_upload(e)
	else
	puts "#{Time.now.to_json} ERROR offset #{offset} #{e.class}: #{e}"
	retries += 1
	sleep 60
	retry
	end
	end
	tree_hash.hashes.concat(resp.context[:tree_hash].hashes)
	puts "INFO Uploaded offset #{offset}"
	offset += chunk.bytesize
	retries = 0
	end

	# complete the multipart upload
	begin
	resp = @glacier.complete_multipart_upload(
	vault_name: @vault_name,
	upload_id: @upload_id,
	archive_size: File.size?(@file.path),
	checksum: tree_hash.digest
	)
	rescue Aws::Glacier::Errors::ServiceError => e
	abort_upload(e)
	end

	else
	begin
	resp = @glacier_n.upload_archive(
	vault_name: @vault_name,
	archive_description: @file.path,
	body: @file
	)
	rescue Aws::Glacier::Errors::ServiceError => e
	abort_upload(e)
	end
	end

	puts "ARCHIVE_ID: #{resp.archive_id}"
	puts "SIZE: #{size}"
	puts "DATE: #{Date.today}"