Skip to content

Instantly share code, notes, and snippets.

@nanvel
Last active August 21, 2017 04:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nanvel/7d1babe6cda45c60c939bea932f61c52 to your computer and use it in GitHub Desktop.
Save nanvel/7d1babe6cda45c60c939bea932f61c52 to your computer and use it in GitHub Desktop.
AWS S3 file ETag
import binascii
import hashlib
import os
# Max size in bytes before uploading in parts
AWS_UPLOAD_MAX_SIZE = 20 * 1024 * 1024
# Size of parts when uploading in parts
# make sure you use upload part size the same as your client
# be careful, botocore put_object can't do multipart:
# https://stackoverflow.com/questions/38442512/difference-between-upload-and-putobject-for-uploading-a-file-to-s3
AWS_UPLOAD_PART_SIZE = 8 * 1024 * 1024
def s3_etag(f):
"""
Source: https://stackoverflow.com/questions/6591047/etag-definition-changed-in-amazon-s3/28877788#28877788
Get the md5 hash of a file stored in S3.
with open('./myfile.txt', 'rb') as f:
etag = s3_etag(f)
:return: md5 hash that will match the ETag in S3
"""
f.seek(0, os.SEEK_END)
filesize = f.tell()
f.seek(0)
if filesize > AWS_UPLOAD_MAX_SIZE:
block_count = 0
md5string = b''
for block in iter(lambda: f.read(AWS_UPLOAD_PART_SIZE), b''):
h = hashlib.md5()
h.update(block)
md5string = md5string + binascii.unhexlify(h.hexdigest())
block_count += 1
h = hashlib.md5()
h.update(md5string)
return h.hexdigest() + "-" + str(block_count)
else:
h = hashlib.md5()
for block in iter(lambda: f.read(AWS_UPLOAD_PART_SIZE), b''):
h.update(block)
return h.hexdigest()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment