pv <filename> | split --bytes=134217728 --verbose -a 5 - part
Created
January 21, 2018 14:35
-
-
Save maerlyn/24701f1813fd5fd4a641c32467912bf8 to your computer and use it in GitHub Desktop.
glacier multipart upload
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# dependencies, jq and parallel: | |
# sudo dnf install jq | |
# sudo dnf install parallel | |
# sudo pip install awscli | |
byteSize=134217728 | |
vaultName='digicam' | |
# count the number of files that begin with "part" | |
fileCount=$(ls -1 | grep "^part" | wc -l) | |
echo "Total parts to upload: " $fileCount | |
# get the list of part files to upload. Edit this if you chose a different prefix in the split command | |
files=$(ls | grep "^part") | |
# initiate multipart upload connection to glacier | |
init=$(aws glacier initiate-multipart-upload --account-id - --part-size $byteSize --vault-name $vaultName --archive-description '2015-06-04-06-Csapatepito.tar.bz2') | |
echo "---------------------------------------" | |
# xargs trims off the quotes | |
# jq pulls out the json element titled uploadId | |
uploadId=$(echo $init | jq '.uploadId' | xargs) | |
# create temp file to store commands | |
> commands.txt | |
#get total size in bytes of the archive | |
archivesize=`ls -l $1 | cut -d ' ' -f 5` | |
# create upload commands to be run in parallel and store in commands.txt | |
i=0 | |
for f in $files | |
do | |
filesize=`ls -l $f | cut -d ' ' -f 5` | |
echo 'filesize ' $filesize | |
byteStart=$((i*byteSize)) | |
byteEnd=$((i*byteSize+byteSize-1)) | |
#if the filesize is less than the bytesize, set the bytesize to be the filesize | |
if [ "$byteEnd" -gt "$archivesize" ]; then | |
byteEnd=$((archivesize-1)) | |
fi | |
echo aws glacier upload-multipart-part --body $f --range "'"'bytes '"$byteStart"'-'"$byteEnd"'/'"$archivesize""'" --account-id - --vault-name $vaultName --upload-id $uploadId >> commands.txt | |
i=$(($i+1)) | |
done | |
# run upload commands in parallel | |
# --load 100% option only gives new jobs out if the core is than 100% active | |
# -a commands.txt runs every line of that file in parallel, in potentially random order | |
# --notice supresses citation output to the console | |
# --bar provides a command line progress bar | |
parallel -j 1 -a commands.txt --no-notice --bar | |
echo "List Active Multipart Uploads:" | |
echo "Verify that a connection is open:" | |
aws glacier list-multipart-uploads --account-id - --vault-name $vaultName | |
#compute the tree hash | |
checksum=`java TreeHashExample $1 | cut -d ' ' -f 5` | |
# end the multipart upload | |
result=`aws glacier complete-multipart-upload --account-id - --vault-name $vaultName --upload-id $uploadId --archive-size $archivesize --checksum $checksum` | |
#store the json response from amazon for record keeping | |
touch result.json | |
echo $result >> result.json | |
# list open multipart connections | |
echo "------------------------------" | |
echo "List Active Multipart Uploads:" | |
echo "Verify that the connection is closed:" | |
aws glacier list-multipart-uploads --account-id - --vault-name $vaultName | |
#echo "-------------" | |
#echo "Contents of commands.txt" | |
#cat commands.txt | |
#echo "--------------" | |
#echo "Deleting temporary commands.txt file" | |
#rm commands.txt | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.IOException; | |
import java.security.MessageDigest; | |
import java.security.NoSuchAlgorithmException; | |
public class TreeHashExample { | |
static final int ONE_MB = 1024 * 1024; | |
/** | |
* Compute the Hex representation of the SHA-256 tree hash for the specified | |
* File | |
* | |
* @param args | |
* args[0]: a file to compute a SHA-256 tree hash for | |
*/ | |
public static void main(String[] args) { | |
if (args.length < 1) { | |
System.err.println("Missing required filename argument"); | |
System.exit(-1); | |
} | |
File inputFile = new File(args[0]); | |
try { | |
byte[] treeHash = computeSHA256TreeHash(inputFile); | |
System.out.printf("SHA-256 Tree Hash = %s\n", toHex(treeHash)); | |
} catch (IOException ioe) { | |
System.err.format("Exception when reading from file %s: %s", inputFile, | |
ioe.getMessage()); | |
System.exit(-1); | |
} catch (NoSuchAlgorithmException nsae) { | |
System.err.format("Cannot locate MessageDigest algorithm for SHA-256: %s", | |
nsae.getMessage()); | |
System.exit(-1); | |
} | |
} | |
/** | |
* Computes the SHA-256 tree hash for the given file | |
* | |
* @param inputFile | |
* a File to compute the SHA-256 tree hash for | |
* @return a byte[] containing the SHA-256 tree hash | |
* @throws IOException | |
* Thrown if there's an issue reading the input file | |
* @throws NoSuchAlgorithmException | |
*/ | |
public static byte[] computeSHA256TreeHash(File inputFile) throws IOException, | |
NoSuchAlgorithmException { | |
byte[][] chunkSHA256Hashes = getChunkSHA256Hashes(inputFile); | |
return computeSHA256TreeHash(chunkSHA256Hashes); | |
} | |
/** | |
* Computes a SHA256 checksum for each 1 MB chunk of the input file. This | |
* includes the checksum for the last chunk even if it is smaller than 1 MB. | |
* | |
* @param file | |
* A file to compute checksums on | |
* @return a byte[][] containing the checksums of each 1 MB chunk | |
* @throws IOException | |
* Thrown if there's an IOException when reading the file | |
* @throws NoSuchAlgorithmException | |
* Thrown if SHA-256 MessageDigest can't be found | |
*/ | |
public static byte[][] getChunkSHA256Hashes(File file) throws IOException, | |
NoSuchAlgorithmException { | |
MessageDigest md = MessageDigest.getInstance("SHA-256"); | |
long numChunks = file.length() / ONE_MB; | |
if (file.length() % ONE_MB > 0) { | |
numChunks++; | |
} | |
if (numChunks == 0) { | |
return new byte[][] { md.digest() }; | |
} | |
byte[][] chunkSHA256Hashes = new byte[(int) numChunks][]; | |
FileInputStream fileStream = null; | |
try { | |
fileStream = new FileInputStream(file); | |
byte[] buff = new byte[ONE_MB]; | |
int bytesRead; | |
int idx = 0; | |
while ((bytesRead = fileStream.read(buff, 0, ONE_MB)) > 0) { | |
md.reset(); | |
md.update(buff, 0, bytesRead); | |
chunkSHA256Hashes[idx++] = md.digest(); | |
} | |
return chunkSHA256Hashes; | |
} finally { | |
if (fileStream != null) { | |
try { | |
fileStream.close(); | |
} catch (IOException ioe) { | |
System.err.printf("Exception while closing %s.\n %s", file.getName(), | |
ioe.getMessage()); | |
} | |
} | |
} | |
} | |
/** | |
* Computes the SHA-256 tree hash for the passed array of 1 MB chunk | |
* checksums. | |
* | |
* This method uses a pair of arrays to iteratively compute the tree hash | |
* level by level. Each iteration takes two adjacent elements from the | |
* previous level source array, computes the SHA-256 hash on their | |
* concatenated value and places the result in the next level's destination | |
* array. At the end of an iteration, the destination array becomes the | |
* source array for the next level. | |
* | |
* @param chunkSHA256Hashes | |
* An array of SHA-256 checksums | |
* @return A byte[] containing the SHA-256 tree hash for the input chunks | |
* @throws NoSuchAlgorithmException | |
* Thrown if SHA-256 MessageDigest can't be found | |
*/ | |
public static byte[] computeSHA256TreeHash(byte[][] chunkSHA256Hashes) | |
throws NoSuchAlgorithmException { | |
MessageDigest md = MessageDigest.getInstance("SHA-256"); | |
byte[][] prevLvlHashes = chunkSHA256Hashes; | |
while (prevLvlHashes.length > 1) { | |
int len = prevLvlHashes.length / 2; | |
if (prevLvlHashes.length % 2 != 0) { | |
len++; | |
} | |
byte[][] currLvlHashes = new byte[len][]; | |
int j = 0; | |
for (int i = 0; i < prevLvlHashes.length; i = i + 2, j++) { | |
// If there are at least two elements remaining | |
if (prevLvlHashes.length - i > 1) { | |
// Calculate a digest of the concatenated nodes | |
md.reset(); | |
md.update(prevLvlHashes[i]); | |
md.update(prevLvlHashes[i + 1]); | |
currLvlHashes[j] = md.digest(); | |
} else { // Take care of remaining odd chunk | |
currLvlHashes[j] = prevLvlHashes[i]; | |
} | |
} | |
prevLvlHashes = currLvlHashes; | |
} | |
return prevLvlHashes[0]; | |
} | |
/** | |
* Returns the hexadecimal representation of the input byte array | |
* | |
* @param data | |
* a byte[] to convert to Hex characters | |
* @return A String containing Hex characters | |
*/ | |
public static String toHex(byte[] data) { | |
StringBuilder sb = new StringBuilder(data.length * 2); | |
for (int i = 0; i < data.length; i++) { | |
String hex = Integer.toHexString(data[i] & 0xFF); | |
if (hex.length() == 1) { | |
// Append leading zero. | |
sb.append("0"); | |
} | |
sb.append(hex); | |
} | |
return sb.toString().toLowerCase(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment