Skip to content

Instantly share code, notes, and snippets.

@jgru
Created December 21, 2021 06:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jgru/f12a2f3293c3a99920cad76c6c426e36 to your computer and use it in GitHub Desktop.
Save jgru/f12a2f3293c3a99920cad76c6c426e36 to your computer and use it in GitHub Desktop.
Helper script to upload `STDIN` directly to AWS S3 storage by utilizing `boto`
#!/usr/bin/env python3
# Perform a multipart upload to Amazon S3 of data read from stdin.
#
# Example usage:
# tar -C / -cpjO /home | toS3 -k aws.key -b com-example-backup -o home.tar.bz2
#
# Originally authored from
# https://www.vennedey.net/blog/1-Pipe-data-from-STDIN-to-Amazon-S3
# Adapted to python3
import boto
import sys
import optparse
import time
import traceback
import hashlib
import math
from io import StringIO
def upload(options):
# Establish connection to S3
try:
s3_connection = boto.connect_s3(options.aws_key, options.aws_secret)
except:
print("Error: Connection to S3 could not be established.")
if options.debug:
print(traceback.format_exc())
sys.exit(4)
# Check if the bucket is available
try:
s3_bucket = s3_connection.lookup(options.bucket)
if not s3_bucket:
raise Exception("BucketLookupFailed")
except:
print(f"Error: Bucket {options.bucket} is not available.")
if options.debug:
print(traceback.format_exc())
sys.exit(5)
if s3_bucket.get_key(options.object):
print("Error: Object {options.object} exists in bucket {options.bucket}")
sys.exit(6)
# Initiate the upload
try:
s3_upload = s3_bucket.initiate_multipart_upload(options.object)
except:
print("Error: Multipart upload could not be initiated.")
if options.debug:
print(traceback.format_exc())
sys.exit(7)
# Read options.size bytes from stdin and upload it as a multipart to S3.
# The md5sum of each part is calculated, and the md5sum of the concatinated
# checksums of each part is calculated on the way to verify the files
# integrity after upload by comparing calculated checksum with
# eTag of uploaded object.
uploadPart = 0
uploadHash = hashlib.md5()
printByteLength = str(math.ceil(math.log10(options.size)))
while True:
bytes = sys.stdin.read(options.size)
if not bytes:
print("Reached end of inputstream.")
break
uploadPart += 1
uploadPartHash = hashlib.md5(bytes)
uploadHash.update(uploadPartHash.digest())
# If upload fails, try again.
uploadPartTry = 0
while True:
try:
uploadPartTry += 1
print(
"Upload part {uploadPart:10} - {len(bytes):printByteLength} Bytes"
"- {uploadPartHash.hexdigest()} - Try {uploadPartTry}"
)
s3_upload.upload_part_from_file(StringIO(bytes), uploadPart)
break
except:
print("Error uploading part. Try again in {options.time} seconds...")
if options.debug:
print(traceback.format_exc())
time.sleep(options.time)
# Complete upload and check integrity
try:
s3_upload.complete_upload()
# Compare the eTag of the uploaded object with the localy calculated md5sum.
checksum_remote = s3_bucket.get_key(options.object).etag.strip('"')
checksum_local = uploadHash.hexdigest() + "-" + str(uploadPart)
if checksum_remote != checksum_local:
print("Error: Local checksum differs from object's eTag:")
print("Local : {checksum_local}")
print("Remote: {checksum_remoten}")
print("The upload might be corrupt.")
raise Exception("ChecksumMismatch")
print("Upload completed")
except:
print("Error: Error while completing upload.")
if options.debug:
print(traceback.format_exc())
sys.exit(8)
def main(argv):
parser = optparse.OptionParser()
parser.set_usage(
"%prog -k AWS_KEY_FILE -b BUCKET_NAME -o OBJECT_NAME"
"[-s CHUNK_SIZE] [-t SECONDS]"
)
parser.add_option(
"-k",
"--keyfile",
dest="keyfile",
metavar="AWS_KEY_FILE",
help="File that contains: <AWS_KEY>:<AWS_SECRET>" " for S3 access.",
)
parser.add_option(
"-b",
"--bucket",
dest="bucket",
metavar="BUCKET_NAME",
help="Name of the target bucket.",
)
parser.add_option(
"-o",
"--object",
dest="object",
metavar="OBJECT_NAME",
help="Name of the target object.",
)
parser.add_option(
"-s",
"--size",
type="int",
dest="size",
metavar="CHUNK_SIZE",
default=1024 * 1024 * 5,
help="Split upload in CHUNK_SIZE bytes." " Default is 5 MiB.",
)
parser.add_option(
"-t",
"--time",
type="int",
dest="time",
metavar="SECONDS",
default=5,
help="Time in seconds to wait until retry upload a "
"failed part again. Default is 5.",
)
parser.add_option(
"-d",
"--debug",
action="store_true",
dest="debug",
default=False,
help="Print debug information",
)
options, args = parser.parse_args()
if not (options.keyfile and options.bucket and options.object):
parser.print_help()
sys.exit(1)
try:
with open(options.keyfile, "r") as f:
options.aws_key, options.aws_secret = f.read().strip(" \t\n\r").split(":")
except IOError:
print("Please provide readable keyfile with AWS credentials.")
sys.exit(2)
except ValueError:
print(
"Please provide AWS credentials <AWS_KEY>:<AWS_SECRET> "
"in keyfile {options.keyfile}."
)
sys.exit(3)
upload(options)
if __name__ == "__main__":
main(sys.argv)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment