Skip to content

Instantly share code, notes, and snippets.

@abg
Last active November 22, 2019 20:49
Show Gist options
  • Save abg/c28c6cfa09ae4cae41d73fc317310e66 to your computer and use it in GitHub Desktop.
Save abg/c28c6cfa09ae4cae41d73fc317310e66 to your computer and use it in GitHub Desktop.
Compute checksums with external program vs. python stdlib, with python3 compatibility
from __future__ import print_function
import argparse
import codecs
import hashlib
import os
import subprocess
import sys
import time
try:
import xxhash
except ImportError:
xxhash = None
def compute_checksum(algorithm, basedir, filename, output):
if algorithm == 'xxhash':
if xxhash is None:
raise Exception("xxhash not available on this platform. Try 'pip install xxhash'")
h = xxhash.xxh64()
else:
h = hashlib.new(algorithm)
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(16384), b""):
h.update(chunk)
rpath = os.path.relpath(filename, basedir)
checksum_txt = "%s %s" % (h.hexdigest(), rpath)
print(checksum_txt, file=output)
def run_checksum(algorithm, basedir, filename, output):
if algorithm == 'xxhash':
algorithm = 'xxh'
cmd = algorithm + 'sum'
rpath = os.path.relpath(filename, basedir)
args = [cmd, rpath]
process = subprocess.Popen(args, stdout=output, close_fds=False, cwd=basedir)
if process.wait() != 0:
raise subprocess.CalledProcessError(process.returncode, args)
def main():
parser = argparse.ArgumentParser(description='Compute checksums for all files in CWD')
parser.add_argument('--pure-python', action='store_true', help='Generate checksums with sha256')
parser.add_argument('-a', '--algorithm', default='xxhash', help='Generate checksums')
args = parser.parse_args()
# TODO: Replace with the actual basedir of the backup directory
basedir = os.getcwd()
checksums_path = os.path.join(basedir, 'CHECKSUMS')
t0 = time.time()
with codecs.open(checksums_path, 'w', encoding='utf8') as output:
for root, _, files in os.walk(basedir):
for fname in files:
cpath = os.path.join(root, fname)
if cpath == checksums_path:
continue
if args.pure_python:
compute_checksum(args.algorithm, basedir, cpath, output)
else:
run_checksum(args.algorithm, basedir, cpath, output)
print("Total Elapsed time: %r" % (time.time() - t0,), file=sys.stderr)
if __name__ == '__main__':
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment