Skip to content

Instantly share code, notes, and snippets.

@TheGU
Created February 19, 2021 00:36
Show Gist options
  • Save TheGU/7d0433149e68479c7ab6b081ca6160c5 to your computer and use it in GitHub Desktop.
Save TheGU/7d0433149e68479c7ab6b081ca6160c5 to your computer and use it in GitHub Desktop.
Upload to AWS S3 then recheck content with md5 hash
import os
import boto3
import botocore
import hashlib
def md5sum(filepath):
hash = hashlib.md5()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(128 * hash.block_size), b""):
hash.update(chunk)
return hash.hexdigest()
def upload_file(s3_client, bucket=None, key=None, filepath=None, storage_class='STANDARD', retry=3):
etag = None
while retry > 0:
try:
response = s3_client.upload_file(
filepath,
bucket,
key,
ExtraArgs={
'StorageClass': storage_class,
'Metadata': {
'namehash': os.path.basename(filepath),
'mode': 'encrypted'
}
}
)
if not response:
etag = s3_client.head_object(Bucket=bucket, Key=key)['ETag'].strip('"')
except botocore.exceptions.ClientError as exc:
raise Exception('file uploaded client error') from exc
except Exception as exc:
raise Exception('file uploaded cannot fail') from exc
checksum = md5sum(filepath)
if checksum == etag:
return "{}/{}/{}".format(s3_client.meta.endpoint_url, bucket, key)
else:
if retry <= 0:
raise Exception('file uploaded checksum mismatch {} after {} retry'.format(object_url, retry))
retry = retry - 1
s3_client = boto3.client(
's3',
endpoint_url="https://{}".format(config.get(source, 'Endpoint')),
aws_access_key_id = config.get(source, 'AccessKey'),
aws_secret_access_key = config.get(source, 'SecretKey'),
verify=False, config=botocore.config.Config(
max_pool_connections=max_pool,
retries = {'max_attempts': retry, 'mode': 'standard'})
)
upload_file(s3_client, bucket="TEST", key="FILE", filepath="LOCALFILE", storage_class='STANDARD', retry=3):
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment