brainstorm/upload_glacier.sh

## upload_glacier.sh
#!/bin/bash
#"IcePick", orginally "gardnert/aws-glacier-multipart-upload"
#./icepick.sh [filename]

#defaults
#byteSize=4294967296 # MAX Glacier, 4GB?
#byteSize=104857600 # 100MB otherwise timeouts? wrooong, not power of 2 so glacier will not accept it
byteSize=134217728 # 128MB, power of two, sweet spot to reduce uploading timeouts
account="-"
vaultName="testvault"
# XXX: description does not support multiple words
descript='testvault'


#create the file parts
split --bytes=$byteSize --verbose $1 part

# count the number of files that begin with "part"
fileCount=$(ls -1 | grep "^part" | wc -l)
echo "Total parts to upload: " $fileCount

#get the list of part files to upload.  Edit this if you chose a different prefix in the split command
files=$(ls | grep "^part")

# initiate multipart upload connection to glacier
uploadId=$(aws glacier initiate-multipart-upload --account-id $account --part-size $byteSize --vault-name $vaultName --archive-description $descript | jq -r .uploadId)

echo "---------------------------------------"
echo "Multipart init upload id is: " $uploadId

# create temp file to store commands
touch commands.txt

#get total size in bytes of the archive
archivesize=`ls -l $1 | cut -d ' ' -f 5`

# create upload commands to be run in parallel and store in commands.txt
i=0
for f in $files
  do
     filesize=`ls -l $f | cut -d ' ' -f 5`
     echo 'filesize '$filesize
     byteStart=$((i*byteSize))
     byteEnd=$((i*byteSize+byteSize-1))
     #if the filesize is less than the bytesize, set the bytesize to be the filesize
     if [ $filesize -lt $byteSize ]; then
        byteEnd=$((byteStart+filesize))
     fi
     echo aws glacier upload-multipart-part --body $f --range "'"'bytes '"$byteStart"'-'"$byteEnd"'/*'"'" --account-id $account --vault-name $vaultName --upload-id $uploadId >> commands.txt
     i=$(($i+1))

  done

# run upload commands in parallel
#   --load 100% option only gives new jobs out if the core is than 100% active
#   -a commands.txt runs every line of that file in parallel, in potentially random order
parallel --load 100% -a commands.txt

echo "List Active Multipart Uploads:"
echo "Verify that a connection is open:"
aws glacier list-multipart-uploads --account-id $account --vault-name $vaultName

#compute the tree hash
checksum=`java TreeHashExample $1 | cut -d ' ' -f 5`


#read

# end the multipart upload
result=`aws glacier complete-multipart-upload --account-id $account --vault-name $vaultName --upload-id $uploadId --archive-size $archivesize --checksum $checksum`

#store the json response from amazon for record keeping
touch result.json
echo $result >> result.json

# list open multipart connections
echo "------------------------------"
echo "List Active Multipart Uploads:"
echo "Verify that the connection is closed:"
aws glacier list-multipart-uploads --account-id $account --vault-name $vaultName

#echo "-------------"
#echo "Contents of commands.txt"
#cat commands.txt

echo "--------------"
echo "Deleting temporary commands.txt file"
rm commands.txt


#remove the part files
#echo "--------------"
#echo "Deleting parts files"
#for f in $files; do
#  rm $f
#done
	#!/bin/bash
	#"IcePick", orginally "gardnert/aws-glacier-multipart-upload"
	#./icepick.sh [filename]

	#defaults
	#byteSize=4294967296 # MAX Glacier, 4GB?
	#byteSize=104857600 # 100MB otherwise timeouts? wrooong, not power of 2 so glacier will not accept it
	byteSize=134217728 # 128MB, power of two, sweet spot to reduce uploading timeouts
	account="-"
	vaultName="testvault"
	# XXX: description does not support multiple words
	descript='testvault'


	#create the file parts
	split --bytes=$byteSize --verbose $1 part

	# count the number of files that begin with "part"
	fileCount=$(ls -1 \| grep "^part" \| wc -l)
	echo "Total parts to upload: " $fileCount

	#get the list of part files to upload. Edit this if you chose a different prefix in the split command
	files=$(ls \| grep "^part")

	# initiate multipart upload connection to glacier
	uploadId=$(aws glacier initiate-multipart-upload --account-id $account --part-size $byteSize --vault-name $vaultName --archive-description $descript \| jq -r .uploadId)

	echo "---------------------------------------"
	echo "Multipart init upload id is: " $uploadId

	# create temp file to store commands
	touch commands.txt

	#get total size in bytes of the archive
	archivesize=`ls -l $1 \| cut -d ' ' -f 5`

	# create upload commands to be run in parallel and store in commands.txt
	i=0
	for f in $files
	do
	filesize=`ls -l $f \| cut -d ' ' -f 5`
	echo 'filesize '$filesize
	byteStart=$((i*byteSize))
	byteEnd=$((i*byteSize+byteSize-1))
	#if the filesize is less than the bytesize, set the bytesize to be the filesize
	if [ $filesize -lt $byteSize ]; then
	byteEnd=$((byteStart+filesize))
	fi
	echo aws glacier upload-multipart-part --body $f --range "'"'bytes '"$byteStart"'-'"$byteEnd"'/*'"'" --account-id $account --vault-name $vaultName --upload-id $uploadId >> commands.txt
	i=$(($i+1))

	done

	# run upload commands in parallel
	# --load 100% option only gives new jobs out if the core is than 100% active
	# -a commands.txt runs every line of that file in parallel, in potentially random order
	parallel --load 100% -a commands.txt

	echo "List Active Multipart Uploads:"
	echo "Verify that a connection is open:"
	aws glacier list-multipart-uploads --account-id $account --vault-name $vaultName

	#compute the tree hash
	checksum=`java TreeHashExample $1 \| cut -d ' ' -f 5`


	#read

	# end the multipart upload
	result=`aws glacier complete-multipart-upload --account-id $account --vault-name $vaultName --upload-id $uploadId --archive-size $archivesize --checksum $checksum`

	#store the json response from amazon for record keeping
	touch result.json
	echo $result >> result.json

	# list open multipart connections
	echo "------------------------------"
	echo "List Active Multipart Uploads:"
	echo "Verify that the connection is closed:"
	aws glacier list-multipart-uploads --account-id $account --vault-name $vaultName

	#echo "-------------"
	#echo "Contents of commands.txt"
	#cat commands.txt

	echo "--------------"
	echo "Deleting temporary commands.txt file"
	rm commands.txt


	#remove the part files
	#echo "--------------"
	#echo "Deleting parts files"
	#for f in $files; do
	# rm $f
	#done