Skip to content

Instantly share code, notes, and snippets.

@huwcbjones
Last active January 4, 2019 23:01
Show Gist options
  • Save huwcbjones/1c124eaaaa99170bfc34e4e7023df213 to your computer and use it in GitHub Desktop.
Save huwcbjones/1c124eaaaa99170bfc34e4e7023df213 to your computer and use it in GitHub Desktop.
create an md5sum for each file in a directory
#!/usr/bin/env python3
import os
import argparse
import re
import subprocess
from typing import Optional, Union
parser = argparse.ArgumentParser(description="Calculate or validate md5 sums for each file in a given directory")
parser.add_argument("mode", choices=["calculate", "validate"])
parser.add_argument("-r", dest="recurse", action="store_true", help="Recurse subdirectories", default=False)
parser.add_argument("directory", help="Directory to sum")
args = parser.parse_args()
MD5SUM_CMD = "gmd5sum"
def sizeof_fmt(num: int, suffix: str = 'B', binary: bool = False) -> str:
"""
Format bytes into human readable format
:param num: Number of bytes
:param suffix: suffix (default: "B")
:param binary: If True, use 1024 instead of 1000 (default: False)
:return: The formatted string
"""
units = ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']
if binary:
factor = 1024.0
else:
factor = 1000.0
string = None
for unit in units:
if abs(num) < factor:
string = "{:3.1f}{}".format(num, unit)
break
num /= factor
if string is None:
string = "{:3.1f}{}".format(num, "Y")
if binary:
string += "i"
return string + suffix
def get_file_size(filepath: str, human_readable: bool = True) -> str:
size = os.path.getsize(filepath)
if not human_readable:
return "{}B".format(size)
return sizeof_fmt(size)
def validate_file(filepath: str, checksumpath: str) -> Optional[bool]:
if filepath.endswith("md5"):
return
if not os.path.exists(filepath) or not os.path.exists(checksumpath):
# Remove checksum file if file does not exist
if os.path.exists(checksumpath):
os.remove(checksumpath)
return
process = subprocess.Popen([MD5SUM_CMD, "-c", checksumpath], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = process.communicate()
out = out.decode()
out = re.findall("{}: ([A-Z]+)".format(filepath), out)[0]
print("{} ({}): {}".format(filepath, get_file_size(filepath), out))
return out.strip().upper() == "OK"
def calculate_file(filepath: str, checksumpath: str) -> Optional[bool]:
if filepath.endswith("md5"):
return
if os.path.exists(checksumpath):
return
with open(checksumpath, "w") as fh:
subprocess.call([MD5SUM_CMD, filepath], stdout=fh)
with open(checksumpath, "r") as fh:
checksum = fh.read()
checksum = re.findall(r"([a-fA-F\d]{32})", checksum)
if checksum:
checksum = checksum[0]
else:
checksum = ""
if checksum == "":
os.remove(checksumpath)
return False
print("{} ({}): {}".format(filepath, get_file_size(filepath), checksum))
return True
def walk_dir(dir, file_cb, recurse=False):
success = 0
failure = 0
for root, subdirs, files in os.walk(dir):
if recurse:
results = [walk_dir(d, recurse) for d in subdirs]
for r in results:
success += r[0]
failure += r[1]
for f in files:
filepath = os.path.join(root, f)
checksumpath = os.path.join(root, ".{}.md5".format(f))
result = file_cb(filepath, checksumpath)
if result is not None:
if result:
success += 1
else:
failure += 1
return success, failure
directory = os.path.abspath(args.directory)
file_cb = None
if args.mode == "calculate":
file_cb = calculate_file
elif args.mode == "validate":
file_cb = validate_file
if file_cb is not None:
success, failures = walk_dir(directory, file_cb, args.recurse)
print("\nSuccessfully {}d {} files".format(args.mode, success))
print("Failed to {} {} files".format(args.mode, failures))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment