Skip to content

Instantly share code, notes, and snippets.

@jeffgerhard
Last active March 30, 2017 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jeffgerhard/a93d8f36761459752cbf7727053e2fd0 to your computer and use it in GitHub Desktop.
Save jeffgerhard/a93d8f36761459752cbf7727053e2fd0 to your computer and use it in GitHub Desktop.
compare two directories and optionally replace one's content (i.e., from a backup)
'''
compare two directories recursively and optionally
replace directories' contents with the other
use case: replacing files from an old backup for
a cloned hard drive that has unpredictably corrupt files
'''
import os
import hashlib
from shutil import copy2
def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, 'rb') as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return hash_md5.hexdigest()
def comparedirs(goodpath, suspiciouspath, replace=None):
''' compares two directories' contents, returning lists
of missing or corrupt (or changed) files. if replace is set, will
replace some files from the goodpath. replace options: 'missing',
'corrupt', 'both', or 'confirm' '''
# POSSIBLY TO DO -- CONSIDER ASSESSING 'EXTRA' FILES
okfiles = []
missingfiles = []
corruptfiles = []
directlength = len(goodpath.split(os.sep))
for (root, dirs, files) in os.walk(goodpath):
print('-' * 45)
print(root)
print(str(len(files)), 'files, ', str(len(dirs)),
'directories')
for f in files:
apath = root.split(os.sep)[directlength:]
spath = os.path.join(suspiciouspath, (os.sep).join(apath))
safefile = os.path.join(root, f)
suspfile = os.path.join(spath, f)
if os.path.exists(suspfile):
x = md5(safefile)
y = md5(suspfile)
if x == y:
okfiles.append(suspfile)
else:
corruptfiles.append(suspfile)
if replace in ['corrupt', 'both', 'confirm']:
replace, _ = confirmreplace(replace, f, 'is corrupt')
if _:
copy2(safefile, suspfile)
else:
missingfiles.append(suspfile)
if replace in ['missing', 'both', 'confirm']:
replace, _ = confirmreplace(replace, f, 'is missing')
if _:
os.makedirs(spath, exist_ok=True)
copy2(safefile, suspfile)
return okfiles, missingfiles, corruptfiles
def confirmreplace(replace, f, reason):
if replace != 'confirm':
print('replacing', f)
return replace, True
print('[enter "both", "corrupt", or "missing" to halt confirmations]')
print(f, reason, end=' -- ')
response = input('replace it? ').lower()
if response == 'y':
print('ok, replacing...')
return replace, True
elif response in ['both', 'corrupt', 'missing']:
return response, True
else:
return replace, False
goodpath = r'C:\path\with\confirmed\files'
suspiciouspath = r'C:\location\of\dubious\files'
ok, missing, corrupt = comparedirs(goodpath, suspiciouspath, replace='confirm')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment