Last active
January 4, 2019 23:01
-
-
Save huwcbjones/1c124eaaaa99170bfc34e4e7023df213 to your computer and use it in GitHub Desktop.
create an md5sum for each file in a directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import argparse | |
import re | |
import subprocess | |
from typing import Optional, Union | |
parser = argparse.ArgumentParser(description="Calculate or validate md5 sums for each file in a given directory") | |
parser.add_argument("mode", choices=["calculate", "validate"]) | |
parser.add_argument("-r", dest="recurse", action="store_true", help="Recurse subdirectories", default=False) | |
parser.add_argument("directory", help="Directory to sum") | |
args = parser.parse_args() | |
MD5SUM_CMD = "gmd5sum" | |
def sizeof_fmt(num: int, suffix: str = 'B', binary: bool = False) -> str: | |
""" | |
Format bytes into human readable format | |
:param num: Number of bytes | |
:param suffix: suffix (default: "B") | |
:param binary: If True, use 1024 instead of 1000 (default: False) | |
:return: The formatted string | |
""" | |
units = ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z'] | |
if binary: | |
factor = 1024.0 | |
else: | |
factor = 1000.0 | |
string = None | |
for unit in units: | |
if abs(num) < factor: | |
string = "{:3.1f}{}".format(num, unit) | |
break | |
num /= factor | |
if string is None: | |
string = "{:3.1f}{}".format(num, "Y") | |
if binary: | |
string += "i" | |
return string + suffix | |
def get_file_size(filepath: str, human_readable: bool = True) -> str: | |
size = os.path.getsize(filepath) | |
if not human_readable: | |
return "{}B".format(size) | |
return sizeof_fmt(size) | |
def validate_file(filepath: str, checksumpath: str) -> Optional[bool]: | |
if filepath.endswith("md5"): | |
return | |
if not os.path.exists(filepath) or not os.path.exists(checksumpath): | |
# Remove checksum file if file does not exist | |
if os.path.exists(checksumpath): | |
os.remove(checksumpath) | |
return | |
process = subprocess.Popen([MD5SUM_CMD, "-c", checksumpath], stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
out, err = process.communicate() | |
out = out.decode() | |
out = re.findall("{}: ([A-Z]+)".format(filepath), out)[0] | |
print("{} ({}): {}".format(filepath, get_file_size(filepath), out)) | |
return out.strip().upper() == "OK" | |
def calculate_file(filepath: str, checksumpath: str) -> Optional[bool]: | |
if filepath.endswith("md5"): | |
return | |
if os.path.exists(checksumpath): | |
return | |
with open(checksumpath, "w") as fh: | |
subprocess.call([MD5SUM_CMD, filepath], stdout=fh) | |
with open(checksumpath, "r") as fh: | |
checksum = fh.read() | |
checksum = re.findall(r"([a-fA-F\d]{32})", checksum) | |
if checksum: | |
checksum = checksum[0] | |
else: | |
checksum = "" | |
if checksum == "": | |
os.remove(checksumpath) | |
return False | |
print("{} ({}): {}".format(filepath, get_file_size(filepath), checksum)) | |
return True | |
def walk_dir(dir, file_cb, recurse=False): | |
success = 0 | |
failure = 0 | |
for root, subdirs, files in os.walk(dir): | |
if recurse: | |
results = [walk_dir(d, recurse) for d in subdirs] | |
for r in results: | |
success += r[0] | |
failure += r[1] | |
for f in files: | |
filepath = os.path.join(root, f) | |
checksumpath = os.path.join(root, ".{}.md5".format(f)) | |
result = file_cb(filepath, checksumpath) | |
if result is not None: | |
if result: | |
success += 1 | |
else: | |
failure += 1 | |
return success, failure | |
directory = os.path.abspath(args.directory) | |
file_cb = None | |
if args.mode == "calculate": | |
file_cb = calculate_file | |
elif args.mode == "validate": | |
file_cb = validate_file | |
if file_cb is not None: | |
success, failures = walk_dir(directory, file_cb, args.recurse) | |
print("\nSuccessfully {}d {} files".format(args.mode, success)) | |
print("Failed to {} {} files".format(args.mode, failures)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment