Skip to content

Instantly share code, notes, and snippets.

@jynik
Last active June 26, 2023 03:04
Show Gist options
  • Save jynik/6ed3e545c7eb384f68ce3a23e03ef388 to your computer and use it in GitHub Desktop.
Save jynik/6ed3e545c7eb384f68ce3a23e03ef388 to your computer and use it in GitHub Desktop.
Compare two files containing (default) md5sum output and print results.
#!/usr/bin/env python3
#
# Copyright (C) 2018 Jon Szymaniak <jon.szymaniak@gmail.com>
# SPDX License Identifier: MIT
#
# Compare two files containing (default) md5sum output and print results.
#
################################################################################
__version__ = '0.1.1'
import os
import re
import sys
class MD5SumResults:
_re = re.compile('(?P<hash>[a-fA-F0-9]{32})\s+(?P<file>.*)$')
def __init__(self, filename):
'''
Load the md5sum results contained in `filename`
'''
self._hashes = {}
self.filename = filename
with open(filename, 'r') as infile:
for line in infile:
if len(line) != 0:
m = MD5SumResults._re.match(line)
if m:
self._hashes[m['hash']] = m['file']
def match(self, other):
"""
Returns a list of matching hashes. Each list entry is a tuple in the form:
(hash, this object's filename, other object's filename)
"""
ret = []
for target in self._hashes:
for entry in other._hashes:
if entry == target:
ret.append((target, self._hashes[target], other._hashes[target]))
break
return ret
def no_match(self, other):
"""
Returns a list of hashes that occur in this object, but not in `other`.
Each list entry is in the form:
(hash, filename)
"""
ret = []
for target in self._hashes:
match = False
for entry in other._hashes:
if entry == target:
match = True
break
if not match:
ret.append((target, self._hashes[target]))
return ret
if __name__ == "__main__":
argv_len = len(sys.argv)
show_same = True
show_diff = True
if argv_len < 3 or argv_len > 4:
print('md5sum-diff ({:s})'.format(__version__))
print('Usage: {:s} <file1> <file2> [same|diff]'.format(os.path.basename(sys.argv[0])))
print('Compare two files containing (default) md5sum output and print results.')
print('The optional last argument specifies whether the matches or differences are printed.')
sys.exit(0)
if argv_len == 4:
if sys.argv[3] == 'same':
show_diff = False
elif sys.argv[3] == 'diff':
show_same = False
else:
print('Invalid argument: {:s}'.format(argv[4]), file=sys.stderr)
sys.exit(1)
a = MD5SumResults(sys.argv[1])
b = MD5SumResults(sys.argv[2])
matches = a.match(b) if show_same else []
a_diffs = a.no_match(b) if show_diff else []
b_diffs = b.no_match(a) if show_diff else []
if len(matches) != 0:
print('\nMatching Hashes')
print('-' * 32)
for entry in matches:
print('{:34s}{:32s}{:32s}'.format(entry[0], entry[1], entry[2]))
if len(a_diffs) != 0:
print('\nIn {:s} but not in {:s}'.format(a.filename, b.filename))
print('-' * 32)
for entry in a_diffs:
print('{:34s}{:32s}'.format(entry[0], entry[1]))
if len(b_diffs) != 0:
print('\nIn {:s} but not in {:s}'.format(b.filename, a.filename))
print('-' * 32)
for entry in b_diffs:
print('{:34s}{:32s}'.format(entry[0], entry[1]))
print()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment