Skip to content

Instantly share code, notes, and snippets.

@anshultiwari1
Last active February 20, 2024 10:05
Show Gist options
  • Save anshultiwari1/33020c63ad9d08a95a1e to your computer and use it in GitHub Desktop.
Save anshultiwari1/33020c63ad9d08a95a1e to your computer and use it in GitHub Desktop.
compares two directories, and list all difference based on contents and size ...
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os, sys
import filecmp
holderlist=[]
def compare_dirs(dir1, dir2):
dircomp=filecmp.dircmp(dir1,dir2)
only_in_source=dircomp.left_only
diff_in_source=dircomp.diff_files
dirpath=os.path.abspath(dir1)
[holderlist.append((os.path.abspath(os.path.join(dir1,x)), os.path.getsize(os.path.abspath(os.path.join(dir1,x))))) for x in only_in_source]
[holderlist.append((os.path.abspath(os.path.join(dir1,x)), os.path.getsize(os.path.abspath(os.path.join(dir1,x))))) for x in diff_in_source]
if len(dircomp.common_dirs) > 0:
for item in dircomp.common_dirs:
compare_dirs(os.path.abspath(os.path.join(dir1,item)), os.path.abspath(os.path.join(dir2,item)))
return holderlist
def size_format(size):
units = ['B', 'KB', 'MB', 'GB', 'TB']
for u in units:
if size < 1024: return '%f %s' %(size, u)
size /= 1024
return '%f %s' %(size, units[-1])
def main(args):
#print args
if len(args) != 2:
print "Usage: <script> <source> <destination>\n"
print "< the script compares two folders >\n"
return
dir1, dir2 = args[0], args[1]
if not os.path.isdir(dir1):
print "the source folder is not there\n"
print "< the script compares two folders >\n"
return
if not os.path.isdir(dir2):
print "the destination folder is not there\n"
print "< the script compares two folders >\n"
return
print "<< comparision process start >>\n"
output_string = ''
compare_dirs(dir1, dir2)
import operator
import datetime
output_string += "The source location is:" + dir1 + "\n"
output_string += "The destination location is:" + dir2 + "\n\n"
if not holderlist:
print "there are no difference between the two folders.\n"
else:
print "Success. all differences have been found between the two folders.\n"
holderlist.sort(key=operator.itemgetter(1))
sizeFile = 0
for item in holderlist:
sizeFile += item[1]
output_string += item[0] + "," + size_format(item[1]) + "\n"
output_string += "\nThe total size of the difference is:," + str(size_format(sizeFile)) + "\n"
diff_file = '/nas/projects/Tactic/bilal/render/temp/logs/%s_%s_%s_diff_list.csv' % (str(datetime.date.today()), dir1.split('/')[-1], dir2.split('/')[-1])
if os.path.isfile(diff_file):
os.remove(diff_file)
#size_file = '/test/folder/%s_diff_list.csv' % (str(datetime.date.today()))
diffFile = open(diff_file, 'w')
diffFile.writelines(output_string)
diffFile.close()
print "<< process is complete.>>\n"
print "<< for details, please check the file: %s>>\n" % diff_file
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment