Skip to content

Instantly share code, notes, and snippets.

@Thracky
Created May 31, 2019 14:13
Show Gist options
  • Save Thracky/7f4a9069e998dc1fbedda51e6b210ab7 to your computer and use it in GitHub Desktop.
Save Thracky/7f4a9069e998dc1fbedda51e6b210ab7 to your computer and use it in GitHub Desktop.
Simple Directory diff'er
import os, hashlib, argparse, sys
def hash_dir(directory):
dirhashes = {}
for subdir, dirs, files in os.walk(directory):
for afile in files:
filepath = os.path.join(subdir,afile)
filepath_noparent = filepath.split(directory,1)[1]
if os.path.islink(filepath):
continue
try:
with open(filepath,"rb") as currfile:
sha1 = hashlib.sha1()
while True:
filedata = currfile.read(65536)
if not filedata:
break
sha1.update(filedata)
sha1hash = sha1.hexdigest()
dirhashes[filepath_noparent] = sha1hash
except IOError as e:
print "Error opening " + filepath + ". Ignoring."
return dirhashes
def main(argv):
argparser = argparse.ArgumentParser(description="Directory diff-er for finding changed files between two directory trees",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
argparser.add_argument('dir1',action='store', type=str, metavar="directory1")
argparser.add_argument('dir2',action='store', type=str, metavar="directory2")
args = argparser.parse_args(argv)
dir1hashes = {}
dir2hashes = {}
diffhashes = []
dir1only = []
dir1hashes = hash_dir(args.dir1)
print "All hashes calculated for " + args.dir1
dir2hashes = hash_dir(args.dir2)
print "All hashes calculated for " + args.dir2
for filename, hash in dir1hashes.iteritems():
if filename in dir2hashes:
if dir1hashes[filename] != dir2hashes[filename]:
print filename + " has a different hash."
diffhashes.append(filename)
# get rid of the k,v pair from dir2hashes so we are only left with the non matches
del dir2hashes[filename]
else:
print filename + " was not present in " + args.dir2
dir1only.append(filename)
with open("results.txt","wb+") as resultsfile:
resultsfile.write("Files with different hashes:\n")
diffhashes.sort()
for entry in diffhashes:
resultsfile.write(entry+"\n")
resultsfile.write("\n\nFiles only in " + args.dir1+"\n")
for entry in dir1only:
resultsfile.write(entry+"\n")
resultsfile.write("\n\nFiles only in " + args.dir2+"\n")
for entry,hash in dir2hashes.iteritems():
resultsfile.write(entry+"\n")
print "All done."
if __name__ == "__main__":
print("Directory Differ")
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment