compare two directories and optionally replace one's content (i.e., from a backup)
''' | |
compare two directories recursively and optionally | |
replace directories' contents with the other | |
use case: replacing files from an old backup for | |
a cloned hard drive that has unpredictably corrupt files | |
''' | |
import os | |
import hashlib | |
from shutil import copy2 | |
def md5(fname): | |
hash_md5 = hashlib.md5() | |
with open(fname, 'rb') as f: | |
for chunk in iter(lambda: f.read(4096), b""): | |
hash_md5.update(chunk) | |
return hash_md5.hexdigest() | |
def comparedirs(goodpath, suspiciouspath, replace=None): | |
''' compares two directories' contents, returning lists | |
of missing or corrupt (or changed) files. if replace is set, will | |
replace some files from the goodpath. replace options: 'missing', | |
'corrupt', 'both', or 'confirm' ''' | |
# POSSIBLY TO DO -- CONSIDER ASSESSING 'EXTRA' FILES | |
okfiles = [] | |
missingfiles = [] | |
corruptfiles = [] | |
directlength = len(goodpath.split(os.sep)) | |
for (root, dirs, files) in os.walk(goodpath): | |
print('-' * 45) | |
print(root) | |
print(str(len(files)), 'files, ', str(len(dirs)), | |
'directories') | |
for f in files: | |
apath = root.split(os.sep)[directlength:] | |
spath = os.path.join(suspiciouspath, (os.sep).join(apath)) | |
safefile = os.path.join(root, f) | |
suspfile = os.path.join(spath, f) | |
if os.path.exists(suspfile): | |
x = md5(safefile) | |
y = md5(suspfile) | |
if x == y: | |
okfiles.append(suspfile) | |
else: | |
corruptfiles.append(suspfile) | |
if replace in ['corrupt', 'both', 'confirm']: | |
replace, _ = confirmreplace(replace, f, 'is corrupt') | |
if _: | |
copy2(safefile, suspfile) | |
else: | |
missingfiles.append(suspfile) | |
if replace in ['missing', 'both', 'confirm']: | |
replace, _ = confirmreplace(replace, f, 'is missing') | |
if _: | |
os.makedirs(spath, exist_ok=True) | |
copy2(safefile, suspfile) | |
return okfiles, missingfiles, corruptfiles | |
def confirmreplace(replace, f, reason): | |
if replace != 'confirm': | |
print('replacing', f) | |
return replace, True | |
print('[enter "both", "corrupt", or "missing" to halt confirmations]') | |
print(f, reason, end=' -- ') | |
response = input('replace it? ').lower() | |
if response == 'y': | |
print('ok, replacing...') | |
return replace, True | |
elif response in ['both', 'corrupt', 'missing']: | |
return response, True | |
else: | |
return replace, False | |
goodpath = r'C:\path\with\confirmed\files' | |
suspiciouspath = r'C:\location\of\dubious\files' | |
ok, missing, corrupt = comparedirs(goodpath, suspiciouspath, replace='confirm') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment