Skip to content

Instantly share code, notes, and snippets.

@poliquin
Created August 31, 2021 20:01
Show Gist options
  • Save poliquin/f7bf6c7dd9229880d7014639444b33a2 to your computer and use it in GitHub Desktop.
Save poliquin/f7bf6c7dd9229880d7014639444b33a2 to your computer and use it in GitHub Desktop.
Simple command line tool for computing S3 ETag
import hashlib
"""
Calculate Amazon S3 ETag for one or more files.
Example:
python e3tag.py files/1.txt files/2.txt files/3.txt
"""
def calculate_s3_etag(file_path, chunk_size=8 * 1024 * 1024):
"""Calculate Amazon S3 ETag hash for given file."""
# https://stackoverflow.com/a/43819225/892534
md5s = []
with open(file_path, 'rb') as fp:
while True:
data = fp.read(chunk_size)
if not data:
break
md5s.append(hashlib.md5(data))
if len(md5s) < 1:
return '"{}"'.format(hashlib.md5().hexdigest())
if len(md5s) == 1:
return '"{}"'.format(md5s[0].hexdigest())
digests = b''.join(m.digest() for m in md5s)
digests_md5 = hashlib.md5(digests)
return '{}-{}'.format(digests_md5.hexdigest(), len(md5s))
if __name__ == '__main__':
import argparse
import sys
argp = argparse.ArgumentParser(description='Calculate S3 ETag for file')
argp.add_argument('filepath', nargs='*', help='Files to hash')
argp.add_argument('-k', '--chunk-size', type=int, default=8 * 1024 * 1024)
argp.add_argument(
'-r', '--reverse', action='store_true', help='Show file path first'
)
opts = argp.parse_args()
for fpath in opts.filepath:
etag = calculate_s3_etag(fpath, opts.chunk_size)
if opts.reverse:
sys.stdout.write(f'{fpath} {etag}\n')
else:
sys.stdout.write(f'{etag} {fpath}\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment