Skip to content

Instantly share code, notes, and snippets.

@maerlyn
Created January 21, 2018 14:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maerlyn/24701f1813fd5fd4a641c32467912bf8 to your computer and use it in GitHub Desktop.
Save maerlyn/24701f1813fd5fd4a641c32467912bf8 to your computer and use it in GitHub Desktop.
glacier multipart upload
pv <filename> | split --bytes=134217728 --verbose -a 5 - part
#!/bin/bash
# dependencies, jq and parallel:
# sudo dnf install jq
# sudo dnf install parallel
# sudo pip install awscli
byteSize=134217728
vaultName='digicam'
# count the number of files that begin with "part"
fileCount=$(ls -1 | grep "^part" | wc -l)
echo "Total parts to upload: " $fileCount
# get the list of part files to upload. Edit this if you chose a different prefix in the split command
files=$(ls | grep "^part")
# initiate multipart upload connection to glacier
init=$(aws glacier initiate-multipart-upload --account-id - --part-size $byteSize --vault-name $vaultName --archive-description '2015-06-04-06-Csapatepito.tar.bz2')
echo "---------------------------------------"
# xargs trims off the quotes
# jq pulls out the json element titled uploadId
uploadId=$(echo $init | jq '.uploadId' | xargs)
# create temp file to store commands
> commands.txt
#get total size in bytes of the archive
archivesize=`ls -l $1 | cut -d ' ' -f 5`
# create upload commands to be run in parallel and store in commands.txt
i=0
for f in $files
do
filesize=`ls -l $f | cut -d ' ' -f 5`
echo 'filesize ' $filesize
byteStart=$((i*byteSize))
byteEnd=$((i*byteSize+byteSize-1))
#if the filesize is less than the bytesize, set the bytesize to be the filesize
if [ "$byteEnd" -gt "$archivesize" ]; then
byteEnd=$((archivesize-1))
fi
echo aws glacier upload-multipart-part --body $f --range "'"'bytes '"$byteStart"'-'"$byteEnd"'/'"$archivesize""'" --account-id - --vault-name $vaultName --upload-id $uploadId >> commands.txt
i=$(($i+1))
done
# run upload commands in parallel
# --load 100% option only gives new jobs out if the core is than 100% active
# -a commands.txt runs every line of that file in parallel, in potentially random order
# --notice supresses citation output to the console
# --bar provides a command line progress bar
parallel -j 1 -a commands.txt --no-notice --bar
echo "List Active Multipart Uploads:"
echo "Verify that a connection is open:"
aws glacier list-multipart-uploads --account-id - --vault-name $vaultName
#compute the tree hash
checksum=`java TreeHashExample $1 | cut -d ' ' -f 5`
# end the multipart upload
result=`aws glacier complete-multipart-upload --account-id - --vault-name $vaultName --upload-id $uploadId --archive-size $archivesize --checksum $checksum`
#store the json response from amazon for record keeping
touch result.json
echo $result >> result.json
# list open multipart connections
echo "------------------------------"
echo "List Active Multipart Uploads:"
echo "Verify that the connection is closed:"
aws glacier list-multipart-uploads --account-id - --vault-name $vaultName
#echo "-------------"
#echo "Contents of commands.txt"
#cat commands.txt
#echo "--------------"
#echo "Deleting temporary commands.txt file"
#rm commands.txt
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
public class TreeHashExample {
static final int ONE_MB = 1024 * 1024;
/**
* Compute the Hex representation of the SHA-256 tree hash for the specified
* File
*
* @param args
* args[0]: a file to compute a SHA-256 tree hash for
*/
public static void main(String[] args) {
if (args.length < 1) {
System.err.println("Missing required filename argument");
System.exit(-1);
}
File inputFile = new File(args[0]);
try {
byte[] treeHash = computeSHA256TreeHash(inputFile);
System.out.printf("SHA-256 Tree Hash = %s\n", toHex(treeHash));
} catch (IOException ioe) {
System.err.format("Exception when reading from file %s: %s", inputFile,
ioe.getMessage());
System.exit(-1);
} catch (NoSuchAlgorithmException nsae) {
System.err.format("Cannot locate MessageDigest algorithm for SHA-256: %s",
nsae.getMessage());
System.exit(-1);
}
}
/**
* Computes the SHA-256 tree hash for the given file
*
* @param inputFile
* a File to compute the SHA-256 tree hash for
* @return a byte[] containing the SHA-256 tree hash
* @throws IOException
* Thrown if there's an issue reading the input file
* @throws NoSuchAlgorithmException
*/
public static byte[] computeSHA256TreeHash(File inputFile) throws IOException,
NoSuchAlgorithmException {
byte[][] chunkSHA256Hashes = getChunkSHA256Hashes(inputFile);
return computeSHA256TreeHash(chunkSHA256Hashes);
}
/**
* Computes a SHA256 checksum for each 1 MB chunk of the input file. This
* includes the checksum for the last chunk even if it is smaller than 1 MB.
*
* @param file
* A file to compute checksums on
* @return a byte[][] containing the checksums of each 1 MB chunk
* @throws IOException
* Thrown if there's an IOException when reading the file
* @throws NoSuchAlgorithmException
* Thrown if SHA-256 MessageDigest can't be found
*/
public static byte[][] getChunkSHA256Hashes(File file) throws IOException,
NoSuchAlgorithmException {
MessageDigest md = MessageDigest.getInstance("SHA-256");
long numChunks = file.length() / ONE_MB;
if (file.length() % ONE_MB > 0) {
numChunks++;
}
if (numChunks == 0) {
return new byte[][] { md.digest() };
}
byte[][] chunkSHA256Hashes = new byte[(int) numChunks][];
FileInputStream fileStream = null;
try {
fileStream = new FileInputStream(file);
byte[] buff = new byte[ONE_MB];
int bytesRead;
int idx = 0;
while ((bytesRead = fileStream.read(buff, 0, ONE_MB)) > 0) {
md.reset();
md.update(buff, 0, bytesRead);
chunkSHA256Hashes[idx++] = md.digest();
}
return chunkSHA256Hashes;
} finally {
if (fileStream != null) {
try {
fileStream.close();
} catch (IOException ioe) {
System.err.printf("Exception while closing %s.\n %s", file.getName(),
ioe.getMessage());
}
}
}
}
/**
* Computes the SHA-256 tree hash for the passed array of 1 MB chunk
* checksums.
*
* This method uses a pair of arrays to iteratively compute the tree hash
* level by level. Each iteration takes two adjacent elements from the
* previous level source array, computes the SHA-256 hash on their
* concatenated value and places the result in the next level's destination
* array. At the end of an iteration, the destination array becomes the
* source array for the next level.
*
* @param chunkSHA256Hashes
* An array of SHA-256 checksums
* @return A byte[] containing the SHA-256 tree hash for the input chunks
* @throws NoSuchAlgorithmException
* Thrown if SHA-256 MessageDigest can't be found
*/
public static byte[] computeSHA256TreeHash(byte[][] chunkSHA256Hashes)
throws NoSuchAlgorithmException {
MessageDigest md = MessageDigest.getInstance("SHA-256");
byte[][] prevLvlHashes = chunkSHA256Hashes;
while (prevLvlHashes.length > 1) {
int len = prevLvlHashes.length / 2;
if (prevLvlHashes.length % 2 != 0) {
len++;
}
byte[][] currLvlHashes = new byte[len][];
int j = 0;
for (int i = 0; i < prevLvlHashes.length; i = i + 2, j++) {
// If there are at least two elements remaining
if (prevLvlHashes.length - i > 1) {
// Calculate a digest of the concatenated nodes
md.reset();
md.update(prevLvlHashes[i]);
md.update(prevLvlHashes[i + 1]);
currLvlHashes[j] = md.digest();
} else { // Take care of remaining odd chunk
currLvlHashes[j] = prevLvlHashes[i];
}
}
prevLvlHashes = currLvlHashes;
}
return prevLvlHashes[0];
}
/**
* Returns the hexadecimal representation of the input byte array
*
* @param data
* a byte[] to convert to Hex characters
* @return A String containing Hex characters
*/
public static String toHex(byte[] data) {
StringBuilder sb = new StringBuilder(data.length * 2);
for (int i = 0; i < data.length; i++) {
String hex = Integer.toHexString(data[i] & 0xFF);
if (hex.length() == 1) {
// Append leading zero.
sb.append("0");
}
sb.append(hex);
}
return sb.toString().toLowerCase();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment