#!/usr/bin/env python | |
from datetime import datetime | |
import subprocess | |
import getopt | |
import sys | |
import os | |
import re | |
results={} | |
sources = [] | |
# Only track process the actual images inside Photos or Aperture libraries | |
excluded_dirs = [ "Thumbnails", "Previews", "Database", "resources", "Attachments", "ProjectCache", "iLifeShared", "iPod Photo Cache", "Caches", "Backup", "iMovie Cache", "iMovie Movie Cache", "iMovie Stabilization", "iMovie Thumbnails", "database", "private", "Data.noindex", "Data", "Contents", "Modified", "Apple TV Photo Cache", "Aperture.aplib" ] | |
be_verbose="" | |
be_verbose_par2="-q" | |
redundancy_level="10" | |
do_force=False | |
do_repair=False | |
do_update=False | |
do_recursive=True | |
do_missing=False | |
do_fast=False | |
files_checked=0 | |
usage = """Usage: protect [options] | |
Options: | |
-d directory Directory to scan, can be specified multiple times | |
-e,--exclude stem Directory element (no path) to ignore, can be specified multiple times | |
-F,--force Update all checksums and parity files | |
-v,--verbose Log everything | |
-u,--update Update the checksums and parity files for any changed files | |
-r,--repair Repair any bitrot detected | |
-1 Do not recurse into subdirectories | |
-R percentage Level of Redundancy (%). Default: 10 | |
-m,--missing Add missing checksums and parity files | |
""" | |
# Get the command line options. | |
try: | |
options, filenames = getopt.getopt(sys.argv[1:], '1ruvd:e:R:Ffm', | |
["force","verbose","update","repair","exclude=","fast","missing"]) | |
except getopt.GetoptError, err: | |
print str(err) | |
sys.exit(2) | |
for o, a in options: | |
if o == '-d': | |
sources.append(a) | |
elif o in ('-F', '--force'): | |
do_force=True | |
elif o in ('-v', '--verbose'): | |
be_verbose=o | |
be_verbose_par2=o | |
elif o in ('-u', '--update'): | |
do_update=True | |
elif o in ('-m', '--missing'): | |
do_missing=True | |
elif o in ('-f', '--fast'): | |
do_fast=True | |
elif o in ('-r', '--repair'): | |
do_repair=True | |
elif o in ('-e', '--exclude'): | |
excluded_dirs.append(a) | |
elif o == '-R': | |
redundancy_level=a | |
elif o == '-1': | |
do_recursive=False | |
else: | |
print "Unknown option: {0}".format(o) | |
print usage | |
sys.exit() | |
if len(sources) == 0: | |
print usage | |
sys.exit() | |
def file_with_path(path, filename): | |
entry = re.sub("\`", "\\`", filename) | |
if path: | |
entry_with_path = path+os.sep+entry | |
else: | |
entry_with_path = entry | |
if not os.path.isfile(entry_with_path) and not os.path.isdir(entry_with_path): | |
print "* Not found: "+entry_with_path | |
return entry_with_path.replace(os.sep+os.sep, os.sep) | |
def parity_file(path, filename): | |
target = filename | |
if not path: | |
path = filename.split(os.sep)[0:-1] | |
target = filename.split(os.sep)[-1] | |
print "* Reparsed {0} {1} {2}".format(filename, path, target) | |
return file_with_path(path, ".protect.{0}.par2".format(target)) | |
def delete_par2(path): | |
files=[] | |
directories=[] | |
if not os.path.isdir(path): | |
return | |
for entry in os.listdir(path): | |
entry_with_path = file_with_path(path, entry) | |
if os.path.isdir(entry_with_path): | |
directories.append(entry_with_path) | |
elif entry.startswith(".protect") or entry.startswith(".chkbit"): | |
print "Deleting " + entry_with_path | |
os.remove(entry_with_path) | |
for d in directories: | |
delete_par2(d) | |
def process_directory(path): | |
files=[] | |
directories=[] | |
is_valid = True | |
for entry in os.listdir(path): | |
entry_with_path = file_with_path(path, entry) | |
if entry in excluded_dirs: | |
if be_verbose: | |
print "Skipping "+ entry_with_path | |
delete_par2(entry_with_path) | |
results[entry_with_path] = "--" | |
elif os.path.isdir(entry_with_path): | |
directories.append(entry_with_path) | |
elif entry[0] == '.': | |
skip = True | |
elif os.path.isfile(entry_with_path): | |
files.append(entry) | |
else: | |
results[entry_with_path] = '?' | |
print "Unhandled " + entry_with_path | |
return ( directories, files ) | |
def repair_with_parity(path, filename): | |
parity = parity_file(path, filename) | |
target = file_with_path(path, filename) | |
if not do_repair: | |
return False | |
print "\n + Repairing '{0}'".format(target) | |
try: | |
# -n1 : create only one recovery file. Has no effect on recoverability of the target file. | |
# -r10 : Tolerate up to 10% of the file being corrupted. Default is 5%. | |
subprocess.check_output( | |
'par2 repair "{1}" "{2}"'.format(be_verbose_par2, parity), | |
stderr=subprocess.STDOUT,shell=True).strip() | |
return True | |
except subprocess.CalledProcessError as e: | |
print " ! Parity repair failed: {0}".format(e) | |
for line in e.output.split('\n'): | |
print "Failed: "+line | |
return False | |
def update_parity(path, filename): | |
parity = parity_file(path, filename) | |
target = file_with_path(path, filename) | |
if be_verbose == "-v": | |
print " * Updating {0}".format(parity) | |
else: | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
try: | |
# -n1 : create only one recovery file. Has no effect on recoverability of the target file. | |
# -r10 : Tolerate up to 10% of the file being corrupted. Default is 5%. | |
subprocess.check_output( | |
'par2 create -n1 -r{0} {1} "{2}" "{3}"'.format(redundancy_level, be_verbose_par2, parity, target), | |
stderr=subprocess.STDOUT,shell=True).strip() | |
except subprocess.CalledProcessError as e: | |
print " ! Parity update failed: {0}".format(e) | |
for line in e.output.split('\n'): | |
if line.startswith("Target:") and not line.endswith("- found."): | |
print line | |
elif line.startswith("Repair"): | |
print line | |
par2_missing = 0 | |
par2_invalid = 1 | |
par2_repair = 2 | |
par2_valid = 3 | |
def check_parity(path, filename): | |
global files_checked | |
parity = parity_file(path, filename) | |
entry_with_path = file_with_path(path, filename) | |
if not os.path.isfile(parity): | |
if be_verbose == "-v": print " * No parity file for '{0}'".format(entry_with_path) | |
#print " * No parity file for '{0}'".format(entry_with_path) | |
if not results.has_key(entry_with_path) or results[entry_with_path] != "?": | |
results[entry_with_path] = "??" | |
return par2_missing | |
#if be_verbose == "-v": print " * Checking parity {0}".format(entry_with_path) | |
try: | |
subprocess.check_output( | |
'par2 verify {0} "{1}"'.format(be_verbose_par2, parity), | |
stderr=subprocess.STDOUT,shell=True).strip() | |
files_checked = files_checked + 1 | |
except subprocess.CalledProcessError as e: | |
print " ! Parity check failed: %s" % e | |
for line in e.output.split('\n'): | |
if line.startswith("Target:") and not line.endswith("- found."): | |
results[entry_with_path] = "M" | |
print line | |
elif line.startswith("Repair is required"): | |
skip=True | |
elif line.startswith("Repair is possible"): | |
results[entry_with_path] = "R" | |
return par2_repair | |
elif line.startswith("Repair is"): | |
raise ValueError("{0} for '{1}'".format(line, entry_with_path)) | |
elif be_verbose == "-v": | |
print "Debug: "+line | |
return par2_invalid | |
return par2_valid | |
def process_chkbit(path, output): | |
changed = [] | |
invalid = False | |
updated = False | |
unhandled = False | |
for line in output.split('\n'): | |
if len(line) > 2: | |
entry_with_path = line[2:] | |
entries = entry_with_path.split(os.sep) | |
code = line[0] | |
for entry in entries: | |
skip = False | |
if entry in excluded_dirs: | |
skip = True | |
# TODO: Delete from the point at which the match was made | |
delete_par2(entry_with_path) | |
results[entry_with_path] = "--" | |
if be_verbose: | |
print "Skipping {0}: {1}".format(entry, entry_with_path) | |
break | |
if skip: | |
#print "CHKBIT Skipping "+ entry_with_path | |
continue | |
elif code == 'u' or code == 'a': | |
results[entry_with_path] = code | |
changed.append(entry_with_path) | |
updated = True | |
if do_update: | |
update_parity(None, entry_with_path) | |
elif code == 'r' or code == 'E': | |
if line.find(".chkbit") > 0: | |
if do_repair: | |
print " + Repairing {0}/.chkbit".format(path) | |
subprocess.check_output('chkbit -force "{0}"'.format(path), stderr=subprocess.STDOUT,shell=True) | |
return [] | |
else: | |
print " ! Corrupted checksum file '{0}'".format(entry_with_path) | |
results[entry_with_path] = 'E' | |
return [] | |
elif repair_with_parity(None, entry_with_path): | |
results[entry_with_path] = code | |
else: | |
results[entry_with_path] = code | |
invalid = True | |
elif code == '?': | |
results[entry_with_path] = code | |
elif line.find("with bitrot") > 0: | |
skipped = True | |
else: | |
print "Unhandled chkbit result: "+line | |
unhandled = True | |
if updated: | |
print " " | |
if unhandled: | |
raise ValueError("Unhandled output from chkbit in '{0}'".format(path)) | |
if invalid: | |
raise ValueError("Invalid hashes detected in '{0}'".format(path)) | |
return changed | |
def verify_directory(path): | |
global files_checked | |
changed = [] | |
#print "[{0}] Protecting {1}...".format(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'), path) | |
(directories, files) = process_directory(path) | |
if do_fast: | |
sys.stdout.write('.') | |
sys.stdout.flush() | |
files_checked = files_checked + 1 | |
if len(files) and not os.path.isfile(path+os.sep+".chkbit"): | |
results[path+os.sep] = "??" | |
elif len(files): | |
results[path+os.sep] = "^" | |
for entry in directories: | |
verify_directory(entry) | |
return | |
print "[{0}] Protecting {1}...".format(datetime.strftime(datetime.now(), '%a %H:%M:%S'), path) | |
if do_update and do_force: | |
# Start again | |
os.remove(path+os.sep+".chkbit") | |
elif len(files): | |
# Look for changes to known files | |
try: | |
if be_verbose == "-v": print " * Checking hashes" | |
lines = subprocess.check_output('chkbit -verify "{0}"'.format(path), stderr=subprocess.STDOUT,shell=True) | |
changed = process_chkbit(path, lines) | |
except subprocess.CalledProcessError as e: | |
print "Error: %s" % e | |
changed = process_chkbit(path, e.output) | |
if len(files): | |
# Check existing parity files | |
updated = False | |
for f in files: | |
entry_with_path = file_with_path(path, f) | |
if entry_with_path not in changed:# <-- can't work since changed might include very nested files. | |
# Just re-check everything | |
rc = check_parity(path, f) | |
if rc == par2_missing: | |
if do_missing or do_update: | |
update_parity(path, f) | |
updated = True | |
results[entry_with_path] = "A" | |
elif do_repair and rc == par2_repair: | |
if repair_with_parity(path, f): | |
results[entry_with_path] = "F" | |
else: | |
print "Avoided duplicate scan for "+entry_with_path | |
if updated: | |
print " " | |
if len(files) and not os.path.isfile(path+os.sep+".chkbit"): | |
if do_missing: | |
# Create the chkbit file if it was missing | |
# But do it after we validated any existing parity files | |
lines = subprocess.check_output('chkbit "{0}"'.format(path), stderr=subprocess.STDOUT,shell=True) | |
changed = process_chkbit(path, lines) | |
else: | |
results[path+os.sep+".chkbit"] = "??" | |
if do_recursive: | |
for entry in directories: | |
verify_directory(entry) | |
def show_results(): | |
errors=[] | |
result_keys = results.keys() | |
if result_keys and len(result_keys): | |
if do_fast: | |
print "\n\nChecked {0} directories:".format(files_checked) | |
else: | |
print "\n\nChecked {0} files:".format(files_checked) | |
for f in sorted(result_keys): | |
if results[f] == 'u': | |
print " * '{0}' updated".format(f) | |
elif results[f] == '--': | |
print " '{0}' skipped".format(f) | |
elif results[f] == '^': | |
print " '{0}' tracked".format(f) | |
elif results[f] == 'a': | |
print " + '{0}' added".format(f) | |
elif results[f] == 'A': | |
print " ++ '{0}' created missing parity file".format(f) | |
elif results[f] == 'F': | |
print " ! '{0}' fixed".format(f) | |
elif results[f] == '?': | |
print " ? '{0}' not tracked".format(f) | |
elif results[f] == '??' or results[f] == 'E' or results[f] == 'r' or results[f] == 'P' or results[f] == 'R' or results[f] == 'M': | |
errors.append(f) | |
else: | |
print " !! '{0}' unknown key {1}".format(f, results[f]) | |
if errors: | |
print "Detected '{0}' errors".format(len(errors)) | |
for f in errors: | |
if results[f] == 'E': | |
print " !! '{0}' corrupted checksum".format(f) | |
elif results[f] == 'P': | |
print " !! '{0}' corrupted".format(f) | |
elif results[f] == 'M': | |
print " !! '{0}' missing".format(f) | |
elif results[f] == 'r': | |
print " !! '{0}' requires repair (hash)".format(f) | |
elif results[f] == 'R': | |
print " !! '{0}' requires repair (parity)".format(f) | |
elif results[f] == '??': | |
print " ?? '{0}' missing parity file".format(f) | |
sys.exit(1) | |
try: | |
for s in sources: | |
verify_directory(s) | |
except ValueError as e: | |
print "Processing halted: {0}".format(e) | |
show_results() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment