Created
December 21, 2021 06:01
-
-
Save jgru/f12a2f3293c3a99920cad76c6c426e36 to your computer and use it in GitHub Desktop.
Helper script to upload `STDIN` directly to AWS S3 storage by utilizing `boto`
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Perform a multipart upload to Amazon S3 of data read from stdin. | |
# | |
# Example usage: | |
# tar -C / -cpjO /home | toS3 -k aws.key -b com-example-backup -o home.tar.bz2 | |
# | |
# Originally authored from | |
# https://www.vennedey.net/blog/1-Pipe-data-from-STDIN-to-Amazon-S3 | |
# Adapted to python3 | |
import boto | |
import sys | |
import optparse | |
import time | |
import traceback | |
import hashlib | |
import math | |
from io import StringIO | |
def upload(options): | |
# Establish connection to S3 | |
try: | |
s3_connection = boto.connect_s3(options.aws_key, options.aws_secret) | |
except: | |
print("Error: Connection to S3 could not be established.") | |
if options.debug: | |
print(traceback.format_exc()) | |
sys.exit(4) | |
# Check if the bucket is available | |
try: | |
s3_bucket = s3_connection.lookup(options.bucket) | |
if not s3_bucket: | |
raise Exception("BucketLookupFailed") | |
except: | |
print(f"Error: Bucket {options.bucket} is not available.") | |
if options.debug: | |
print(traceback.format_exc()) | |
sys.exit(5) | |
if s3_bucket.get_key(options.object): | |
print("Error: Object {options.object} exists in bucket {options.bucket}") | |
sys.exit(6) | |
# Initiate the upload | |
try: | |
s3_upload = s3_bucket.initiate_multipart_upload(options.object) | |
except: | |
print("Error: Multipart upload could not be initiated.") | |
if options.debug: | |
print(traceback.format_exc()) | |
sys.exit(7) | |
# Read options.size bytes from stdin and upload it as a multipart to S3. | |
# The md5sum of each part is calculated, and the md5sum of the concatinated | |
# checksums of each part is calculated on the way to verify the files | |
# integrity after upload by comparing calculated checksum with | |
# eTag of uploaded object. | |
uploadPart = 0 | |
uploadHash = hashlib.md5() | |
printByteLength = str(math.ceil(math.log10(options.size))) | |
while True: | |
bytes = sys.stdin.read(options.size) | |
if not bytes: | |
print("Reached end of inputstream.") | |
break | |
uploadPart += 1 | |
uploadPartHash = hashlib.md5(bytes) | |
uploadHash.update(uploadPartHash.digest()) | |
# If upload fails, try again. | |
uploadPartTry = 0 | |
while True: | |
try: | |
uploadPartTry += 1 | |
print( | |
"Upload part {uploadPart:10} - {len(bytes):printByteLength} Bytes" | |
"- {uploadPartHash.hexdigest()} - Try {uploadPartTry}" | |
) | |
s3_upload.upload_part_from_file(StringIO(bytes), uploadPart) | |
break | |
except: | |
print("Error uploading part. Try again in {options.time} seconds...") | |
if options.debug: | |
print(traceback.format_exc()) | |
time.sleep(options.time) | |
# Complete upload and check integrity | |
try: | |
s3_upload.complete_upload() | |
# Compare the eTag of the uploaded object with the localy calculated md5sum. | |
checksum_remote = s3_bucket.get_key(options.object).etag.strip('"') | |
checksum_local = uploadHash.hexdigest() + "-" + str(uploadPart) | |
if checksum_remote != checksum_local: | |
print("Error: Local checksum differs from object's eTag:") | |
print("Local : {checksum_local}") | |
print("Remote: {checksum_remoten}") | |
print("The upload might be corrupt.") | |
raise Exception("ChecksumMismatch") | |
print("Upload completed") | |
except: | |
print("Error: Error while completing upload.") | |
if options.debug: | |
print(traceback.format_exc()) | |
sys.exit(8) | |
def main(argv): | |
parser = optparse.OptionParser() | |
parser.set_usage( | |
"%prog -k AWS_KEY_FILE -b BUCKET_NAME -o OBJECT_NAME" | |
"[-s CHUNK_SIZE] [-t SECONDS]" | |
) | |
parser.add_option( | |
"-k", | |
"--keyfile", | |
dest="keyfile", | |
metavar="AWS_KEY_FILE", | |
help="File that contains: <AWS_KEY>:<AWS_SECRET>" " for S3 access.", | |
) | |
parser.add_option( | |
"-b", | |
"--bucket", | |
dest="bucket", | |
metavar="BUCKET_NAME", | |
help="Name of the target bucket.", | |
) | |
parser.add_option( | |
"-o", | |
"--object", | |
dest="object", | |
metavar="OBJECT_NAME", | |
help="Name of the target object.", | |
) | |
parser.add_option( | |
"-s", | |
"--size", | |
type="int", | |
dest="size", | |
metavar="CHUNK_SIZE", | |
default=1024 * 1024 * 5, | |
help="Split upload in CHUNK_SIZE bytes." " Default is 5 MiB.", | |
) | |
parser.add_option( | |
"-t", | |
"--time", | |
type="int", | |
dest="time", | |
metavar="SECONDS", | |
default=5, | |
help="Time in seconds to wait until retry upload a " | |
"failed part again. Default is 5.", | |
) | |
parser.add_option( | |
"-d", | |
"--debug", | |
action="store_true", | |
dest="debug", | |
default=False, | |
help="Print debug information", | |
) | |
options, args = parser.parse_args() | |
if not (options.keyfile and options.bucket and options.object): | |
parser.print_help() | |
sys.exit(1) | |
try: | |
with open(options.keyfile, "r") as f: | |
options.aws_key, options.aws_secret = f.read().strip(" \t\n\r").split(":") | |
except IOError: | |
print("Please provide readable keyfile with AWS credentials.") | |
sys.exit(2) | |
except ValueError: | |
print( | |
"Please provide AWS credentials <AWS_KEY>:<AWS_SECRET> " | |
"in keyfile {options.keyfile}." | |
) | |
sys.exit(3) | |
upload(options) | |
if __name__ == "__main__": | |
main(sys.argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment