Skip to content

Instantly share code, notes, and snippets.

@tomislacker
Created January 3, 2018 17:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomislacker/888d8c56e0522adecde81aff9cb6565f to your computer and use it in GitHub Desktop.
Save tomislacker/888d8c56e0522adecde81aff9cb6565f to your computer and use it in GitHub Desktop.
S3 Multipart Upload ETag Calculator
#!/usr/bin/env python
"""
USAGE: etag.py {file} [chunk size in bytes]
Calculates the S3 ETag of a multi-part upload
$ ./etag.py Downloads/c2560cf.zip
Downloads/c2560cf.zip 56daa8b3846a358207edc89e9bb90309-6
"""
from __future__ import print_function
import math
import hashlib
import sys
from filechunkio import FileChunkIO # pip install filechunkio
from multiprocessing import Pool
DEFAULT_CHUNK_SIZE = 8 * 1024 * 1024
"""Default chunk size is 8MB"""
def get_etag(src_file, chunk_size):
md5s = []
with open(src_file, 'rb') as fp:
while True:
data = fp.read(chunk_size)
if not data:
break
md5s.append(hashlib.md5(data))
digests = b"".join(m.digest() for m in md5s)
new_md5 = hashlib.md5(digests)
new_etag = "%s-%s" % (new_md5.hexdigest(), len(md5s))
return new_etag
if __name__ == '__main__':
src_file = sys.argv[1]
use_chunk = DEFAULT_CHUNK_SIZE
if len(sys.argv) == 3:
use_chunk = int(sys.argv[2])
print("%s %s" % (
src_file,
get_etag(src_file, use_chunk),
))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment