Skip to content

Instantly share code, notes, and snippets.

@gpiffault
Created January 17, 2022 09:18
Show Gist options
  • Save gpiffault/f8d95d6873054cdf72dab40868b48f5d to your computer and use it in GitHub Desktop.
Save gpiffault/f8d95d6873054cdf72dab40868b48f5d to your computer and use it in GitHub Desktop.
Compare two tar archives, compare file contents with md5
#!/usr/bin/env python3
import hashlib
import tarfile
def hash_dict(tar_path):
tar = tarfile.open(tar_path)
result = {}
for member in tar:
if member.name.endswith(".pyc"):
continue
try:
f = tar.extractfile(member)
except:
print("Error reading", tar_path, member.name)
continue
if f is None:
continue
h = hashlib.new('md5')
while chunk := f.read(102400):
h.update(chunk)
result[member.name] = h.hexdigest()
tar.close()
return result
def diff(path1, path2):
hd1 = hash_dict(path1)
hd2 = hash_dict(path2)
content_mismatch = [m for m in hd1.keys() & hd2.keys() if hd1[m] != hd2[m]]
if content_mismatch:
print("# File content mismatch")
print(*content_mismatch, sep="\n")
missing1 = hd2.keys() - hd1.keys()
if missing1:
print("# Missing files in tar 1")
print(*missing1, sep="\n")
missing2 = hd1.keys() - hd2.keys()
if missing2:
print("# Missing files in tar 2")
print(*missing2, sep="\n")
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("tar1")
parser.add_argument("tar2")
args = parser.parse_args()
diff(args.tar1, args.tar2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment