Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
MD5 hash calculator for multi-part file uploads to S3 / object storage.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
@author Jay E. Taylor <outtatime@gmail.com>
@date 2019-06-03
@description MD5 hash calculator for multi-part file uploads to S3 / object storage.
Also see the Go version: https://gist.github.com/jaytaylor/57799723734dd90e3a8510e0de1ba38f
Based on: https://gist.github.com/itemir/f5bc9fded6483cd79c89ebf4ca1cfd30
"""
import argparse
import hashlib
import logging
import sys
def parse_flags(args):
parser = argparse.ArgumentParser()
parser.add_argument('filename', help='File to calculate chunked MD5 sum for')
parser.add_argument('-s', '--part-size', type=int, default=1073741824, help='Segment size for individual parts, in bytes (default=1073741824, i.e. 1GB)')
parser.add_argument('-b', '--base64', action='store_true', help='Display in base64 instead of hexadecimal')
parser.add_argument('-v', '--verbose', action='store_true', help='Activate verbose log output')
flags = parser.parse_args(args)
logging.basicConfig(level=logging.DEBUG if flags.verbose else logging.INFO)
return flags
def md5_chunks(f, part_bytes):
def log_len(n):
logging.debug('read chunk of length %s', n)
return True
return [hashlib.md5(chunk).digest() for chunk in iter(lambda: f.read(part_bytes), b'') if log_len(len(chunk))]
def main(args):
flags = parse_flags(args)
try:
with open(flags.filename, 'rb') as fh:
hashes = md5_chunks(fh, flags.part_size)
if flags.verbose:
for i, h in enumerate(hashes):
logging.debug('hash=%s part_seq_num=%s', h.encode('hex').strip(), i+1)
except IOError:
logging.error('Cannot open file "%s"', flags.filename)
return 1
logging.debug('joined hash = %s', b''.join(hashes).encode('hex'))
multipart_hash = hashlib.md5(b''.join(hashes)).hexdigest()
logging.debug("final hex digest = %s", multipart_hash)
if flags.base64 == True:
b64_multipart_hash = multipart_hash.decode('hex').encode('base64').strip()
print('%s-%d' % (b64_multipart_hash, len(hashes)))
else:
print('%s-%d' % (multipart_hash, len(hashes)))
return 0
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.