Created
October 10, 2017 08:03
-
-
Save brainstorm/595dff1e5c0d36dda8b449b222952982 to your computer and use it in GitHub Desktop.
Glacier tests dirty script from icepick: https://github.com/leekew/icepick
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#"IcePick", orginally "gardnert/aws-glacier-multipart-upload" | |
#./icepick.sh [filename] | |
#defaults | |
#byteSize=4294967296 # MAX Glacier, 4GB? | |
#byteSize=104857600 # 100MB otherwise timeouts? wrooong, not power of 2 so glacier will not accept it | |
byteSize=134217728 # 128MB, power of two, sweet spot to reduce uploading timeouts | |
account="-" | |
vaultName="testvault" | |
# XXX: description does not support multiple words | |
descript='testvault' | |
#create the file parts | |
split --bytes=$byteSize --verbose $1 part | |
# count the number of files that begin with "part" | |
fileCount=$(ls -1 | grep "^part" | wc -l) | |
echo "Total parts to upload: " $fileCount | |
#get the list of part files to upload. Edit this if you chose a different prefix in the split command | |
files=$(ls | grep "^part") | |
# initiate multipart upload connection to glacier | |
uploadId=$(aws glacier initiate-multipart-upload --account-id $account --part-size $byteSize --vault-name $vaultName --archive-description $descript | jq -r .uploadId) | |
echo "---------------------------------------" | |
echo "Multipart init upload id is: " $uploadId | |
# create temp file to store commands | |
touch commands.txt | |
#get total size in bytes of the archive | |
archivesize=`ls -l $1 | cut -d ' ' -f 5` | |
# create upload commands to be run in parallel and store in commands.txt | |
i=0 | |
for f in $files | |
do | |
filesize=`ls -l $f | cut -d ' ' -f 5` | |
echo 'filesize '$filesize | |
byteStart=$((i*byteSize)) | |
byteEnd=$((i*byteSize+byteSize-1)) | |
#if the filesize is less than the bytesize, set the bytesize to be the filesize | |
if [ $filesize -lt $byteSize ]; then | |
byteEnd=$((byteStart+filesize)) | |
fi | |
echo aws glacier upload-multipart-part --body $f --range "'"'bytes '"$byteStart"'-'"$byteEnd"'/*'"'" --account-id $account --vault-name $vaultName --upload-id $uploadId >> commands.txt | |
i=$(($i+1)) | |
done | |
# run upload commands in parallel | |
# --load 100% option only gives new jobs out if the core is than 100% active | |
# -a commands.txt runs every line of that file in parallel, in potentially random order | |
parallel --load 100% -a commands.txt | |
echo "List Active Multipart Uploads:" | |
echo "Verify that a connection is open:" | |
aws glacier list-multipart-uploads --account-id $account --vault-name $vaultName | |
#compute the tree hash | |
checksum=`java TreeHashExample $1 | cut -d ' ' -f 5` | |
#read | |
# end the multipart upload | |
result=`aws glacier complete-multipart-upload --account-id $account --vault-name $vaultName --upload-id $uploadId --archive-size $archivesize --checksum $checksum` | |
#store the json response from amazon for record keeping | |
touch result.json | |
echo $result >> result.json | |
# list open multipart connections | |
echo "------------------------------" | |
echo "List Active Multipart Uploads:" | |
echo "Verify that the connection is closed:" | |
aws glacier list-multipart-uploads --account-id $account --vault-name $vaultName | |
#echo "-------------" | |
#echo "Contents of commands.txt" | |
#cat commands.txt | |
echo "--------------" | |
echo "Deleting temporary commands.txt file" | |
rm commands.txt | |
#remove the part files | |
#echo "--------------" | |
#echo "Deleting parts files" | |
#for f in $files; do | |
# rm $f | |
#done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment