Created
May 31, 2019 14:13
-
-
Save Thracky/7f4a9069e998dc1fbedda51e6b210ab7 to your computer and use it in GitHub Desktop.
Simple Directory diff'er
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, hashlib, argparse, sys | |
def hash_dir(directory): | |
dirhashes = {} | |
for subdir, dirs, files in os.walk(directory): | |
for afile in files: | |
filepath = os.path.join(subdir,afile) | |
filepath_noparent = filepath.split(directory,1)[1] | |
if os.path.islink(filepath): | |
continue | |
try: | |
with open(filepath,"rb") as currfile: | |
sha1 = hashlib.sha1() | |
while True: | |
filedata = currfile.read(65536) | |
if not filedata: | |
break | |
sha1.update(filedata) | |
sha1hash = sha1.hexdigest() | |
dirhashes[filepath_noparent] = sha1hash | |
except IOError as e: | |
print "Error opening " + filepath + ". Ignoring." | |
return dirhashes | |
def main(argv): | |
argparser = argparse.ArgumentParser(description="Directory diff-er for finding changed files between two directory trees", | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter) | |
argparser.add_argument('dir1',action='store', type=str, metavar="directory1") | |
argparser.add_argument('dir2',action='store', type=str, metavar="directory2") | |
args = argparser.parse_args(argv) | |
dir1hashes = {} | |
dir2hashes = {} | |
diffhashes = [] | |
dir1only = [] | |
dir1hashes = hash_dir(args.dir1) | |
print "All hashes calculated for " + args.dir1 | |
dir2hashes = hash_dir(args.dir2) | |
print "All hashes calculated for " + args.dir2 | |
for filename, hash in dir1hashes.iteritems(): | |
if filename in dir2hashes: | |
if dir1hashes[filename] != dir2hashes[filename]: | |
print filename + " has a different hash." | |
diffhashes.append(filename) | |
# get rid of the k,v pair from dir2hashes so we are only left with the non matches | |
del dir2hashes[filename] | |
else: | |
print filename + " was not present in " + args.dir2 | |
dir1only.append(filename) | |
with open("results.txt","wb+") as resultsfile: | |
resultsfile.write("Files with different hashes:\n") | |
diffhashes.sort() | |
for entry in diffhashes: | |
resultsfile.write(entry+"\n") | |
resultsfile.write("\n\nFiles only in " + args.dir1+"\n") | |
for entry in dir1only: | |
resultsfile.write(entry+"\n") | |
resultsfile.write("\n\nFiles only in " + args.dir2+"\n") | |
for entry,hash in dir2hashes.iteritems(): | |
resultsfile.write(entry+"\n") | |
print "All done." | |
if __name__ == "__main__": | |
print("Directory Differ") | |
main(sys.argv[1:]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment