Skip to content

Instantly share code, notes, and snippets.

@CMCDragonkai
Last active September 14, 2023 07:22
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CMCDragonkai/9e0c4758b6ce111131a23dcb38bbb1eb to your computer and use it in GitHub Desktop.
Save CMCDragonkai/9e0c4758b6ce111131a23dcb38bbb1eb to your computer and use it in GitHub Desktop.
Calculate S3 ETAG including Multipart Uploads #python #aws
#!/usr/bin/env python3
import argparse
import hashlib
# see https://docs.aws.amazon.com/cli/latest/topic/s3-config.html
# for default multipart_threshold and multipart_chunksize
def md5sum(
file_like, multipart_threshold=8 * 1024 * 1024, multipart_chunksize=8 * 1024 * 1024
):
md5hash = hashlib.md5()
file_like.seek(0)
filesize = 0
block_count = 0
md5string = b""
for block in iter(lambda: file_like.read(multipart_chunksize), b""):
md5hash = hashlib.md5()
md5hash.update(block)
md5string += md5hash.digest()
filesize += len(block)
block_count += 1
if filesize > multipart_threshold:
md5hash = hashlib.md5()
md5hash.update(md5string)
md5hash = md5hash.hexdigest() + "-" + str(block_count)
else:
md5hash = md5hash.hexdigest()
file_like.seek(0)
return md5hash
def main():
options_parser = argparse.ArgumentParser()
options_parser.add_argument("file", type=str, nargs="+")
options = options_parser.parse_args()
for file in options.file:
with open(file, mode="rb") as f:
print("{}\t{}".format(md5sum(f), file))
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment