Skip to content

Instantly share code, notes, and snippets.

@beekhof
Created January 9, 2017 11:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save beekhof/8977becd2b12c4ac0c515efb145add30 to your computer and use it in GitHub Desktop.
Save beekhof/8977becd2b12c4ac0c515efb145add30 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from datetime import datetime
import subprocess
import getopt
import sys
import os
import re
results={}
sources = []
# Only track process the actual images inside Photos or Aperture libraries
excluded_dirs = [ "Thumbnails", "Previews", "Database", "resources", "Attachments", "ProjectCache", "iLifeShared", "iPod Photo Cache", "Caches", "Backup", "iMovie Cache", "iMovie Movie Cache", "iMovie Stabilization", "iMovie Thumbnails", "database", "private", "Data.noindex", "Data", "Contents", "Modified", "Apple TV Photo Cache", "Aperture.aplib" ]
be_verbose=""
be_verbose_par2="-q"
redundancy_level="10"
do_force=False
do_repair=False
do_update=False
do_recursive=True
do_missing=False
do_fast=False
files_checked=0
usage = """Usage: protect [options]
Options:
-d directory Directory to scan, can be specified multiple times
-e,--exclude stem Directory element (no path) to ignore, can be specified multiple times
-F,--force Update all checksums and parity files
-v,--verbose Log everything
-u,--update Update the checksums and parity files for any changed files
-r,--repair Repair any bitrot detected
-1 Do not recurse into subdirectories
-R percentage Level of Redundancy (%). Default: 10
-m,--missing Add missing checksums and parity files
"""
# Get the command line options.
try:
options, filenames = getopt.getopt(sys.argv[1:], '1ruvd:e:R:Ffm',
["force","verbose","update","repair","exclude=","fast","missing"])
except getopt.GetoptError, err:
print str(err)
sys.exit(2)
for o, a in options:
if o == '-d':
sources.append(a)
elif o in ('-F', '--force'):
do_force=True
elif o in ('-v', '--verbose'):
be_verbose=o
be_verbose_par2=o
elif o in ('-u', '--update'):
do_update=True
elif o in ('-m', '--missing'):
do_missing=True
elif o in ('-f', '--fast'):
do_fast=True
elif o in ('-r', '--repair'):
do_repair=True
elif o in ('-e', '--exclude'):
excluded_dirs.append(a)
elif o == '-R':
redundancy_level=a
elif o == '-1':
do_recursive=False
else:
print "Unknown option: {0}".format(o)
print usage
sys.exit()
if len(sources) == 0:
print usage
sys.exit()
def file_with_path(path, filename):
entry = re.sub("\`", "\\`", filename)
if path:
entry_with_path = path+os.sep+entry
else:
entry_with_path = entry
if not os.path.isfile(entry_with_path) and not os.path.isdir(entry_with_path):
print "* Not found: "+entry_with_path
return entry_with_path.replace(os.sep+os.sep, os.sep)
def parity_file(path, filename):
target = filename
if not path:
path = filename.split(os.sep)[0:-1]
target = filename.split(os.sep)[-1]
print "* Reparsed {0} {1} {2}".format(filename, path, target)
return file_with_path(path, ".protect.{0}.par2".format(target))
def delete_par2(path):
files=[]
directories=[]
if not os.path.isdir(path):
return
for entry in os.listdir(path):
entry_with_path = file_with_path(path, entry)
if os.path.isdir(entry_with_path):
directories.append(entry_with_path)
elif entry.startswith(".protect") or entry.startswith(".chkbit"):
print "Deleting " + entry_with_path
os.remove(entry_with_path)
for d in directories:
delete_par2(d)
def process_directory(path):
files=[]
directories=[]
is_valid = True
for entry in os.listdir(path):
entry_with_path = file_with_path(path, entry)
if entry in excluded_dirs:
if be_verbose:
print "Skipping "+ entry_with_path
delete_par2(entry_with_path)
results[entry_with_path] = "--"
elif os.path.isdir(entry_with_path):
directories.append(entry_with_path)
elif entry[0] == '.':
skip = True
elif os.path.isfile(entry_with_path):
files.append(entry)
else:
results[entry_with_path] = '?'
print "Unhandled " + entry_with_path
return ( directories, files )
def repair_with_parity(path, filename):
parity = parity_file(path, filename)
target = file_with_path(path, filename)
if not do_repair:
return False
print "\n + Repairing '{0}'".format(target)
try:
# -n1 : create only one recovery file. Has no effect on recoverability of the target file.
# -r10 : Tolerate up to 10% of the file being corrupted. Default is 5%.
subprocess.check_output(
'par2 repair "{1}" "{2}"'.format(be_verbose_par2, parity),
stderr=subprocess.STDOUT,shell=True).strip()
return True
except subprocess.CalledProcessError as e:
print " ! Parity repair failed: {0}".format(e)
for line in e.output.split('\n'):
print "Failed: "+line
return False
def update_parity(path, filename):
parity = parity_file(path, filename)
target = file_with_path(path, filename)
if be_verbose == "-v":
print " * Updating {0}".format(parity)
else:
sys.stdout.write('.')
sys.stdout.flush()
try:
# -n1 : create only one recovery file. Has no effect on recoverability of the target file.
# -r10 : Tolerate up to 10% of the file being corrupted. Default is 5%.
subprocess.check_output(
'par2 create -n1 -r{0} {1} "{2}" "{3}"'.format(redundancy_level, be_verbose_par2, parity, target),
stderr=subprocess.STDOUT,shell=True).strip()
except subprocess.CalledProcessError as e:
print " ! Parity update failed: {0}".format(e)
for line in e.output.split('\n'):
if line.startswith("Target:") and not line.endswith("- found."):
print line
elif line.startswith("Repair"):
print line
par2_missing = 0
par2_invalid = 1
par2_repair = 2
par2_valid = 3
def check_parity(path, filename):
global files_checked
parity = parity_file(path, filename)
entry_with_path = file_with_path(path, filename)
if not os.path.isfile(parity):
if be_verbose == "-v": print " * No parity file for '{0}'".format(entry_with_path)
#print " * No parity file for '{0}'".format(entry_with_path)
if not results.has_key(entry_with_path) or results[entry_with_path] != "?":
results[entry_with_path] = "??"
return par2_missing
#if be_verbose == "-v": print " * Checking parity {0}".format(entry_with_path)
try:
subprocess.check_output(
'par2 verify {0} "{1}"'.format(be_verbose_par2, parity),
stderr=subprocess.STDOUT,shell=True).strip()
files_checked = files_checked + 1
except subprocess.CalledProcessError as e:
print " ! Parity check failed: %s" % e
for line in e.output.split('\n'):
if line.startswith("Target:") and not line.endswith("- found."):
results[entry_with_path] = "M"
print line
elif line.startswith("Repair is required"):
skip=True
elif line.startswith("Repair is possible"):
results[entry_with_path] = "R"
return par2_repair
elif line.startswith("Repair is"):
raise ValueError("{0} for '{1}'".format(line, entry_with_path))
elif be_verbose == "-v":
print "Debug: "+line
return par2_invalid
return par2_valid
def process_chkbit(path, output):
changed = []
invalid = False
updated = False
unhandled = False
for line in output.split('\n'):
if len(line) > 2:
entry_with_path = line[2:]
entries = entry_with_path.split(os.sep)
code = line[0]
for entry in entries:
skip = False
if entry in excluded_dirs:
skip = True
# TODO: Delete from the point at which the match was made
delete_par2(entry_with_path)
results[entry_with_path] = "--"
if be_verbose:
print "Skipping {0}: {1}".format(entry, entry_with_path)
break
if skip:
#print "CHKBIT Skipping "+ entry_with_path
continue
elif code == 'u' or code == 'a':
results[entry_with_path] = code
changed.append(entry_with_path)
updated = True
if do_update:
update_parity(None, entry_with_path)
elif code == 'r' or code == 'E':
if line.find(".chkbit") > 0:
if do_repair:
print " + Repairing {0}/.chkbit".format(path)
subprocess.check_output('chkbit -force "{0}"'.format(path), stderr=subprocess.STDOUT,shell=True)
return []
else:
print " ! Corrupted checksum file '{0}'".format(entry_with_path)
results[entry_with_path] = 'E'
return []
elif repair_with_parity(None, entry_with_path):
results[entry_with_path] = code
else:
results[entry_with_path] = code
invalid = True
elif code == '?':
results[entry_with_path] = code
elif line.find("with bitrot") > 0:
skipped = True
else:
print "Unhandled chkbit result: "+line
unhandled = True
if updated:
print " "
if unhandled:
raise ValueError("Unhandled output from chkbit in '{0}'".format(path))
if invalid:
raise ValueError("Invalid hashes detected in '{0}'".format(path))
return changed
def verify_directory(path):
global files_checked
changed = []
#print "[{0}] Protecting {1}...".format(datetime.strftime(datetime.now(), '%Y-%m-%d %H:%M:%S'), path)
(directories, files) = process_directory(path)
if do_fast:
sys.stdout.write('.')
sys.stdout.flush()
files_checked = files_checked + 1
if len(files) and not os.path.isfile(path+os.sep+".chkbit"):
results[path+os.sep] = "??"
elif len(files):
results[path+os.sep] = "^"
for entry in directories:
verify_directory(entry)
return
print "[{0}] Protecting {1}...".format(datetime.strftime(datetime.now(), '%a %H:%M:%S'), path)
if do_update and do_force:
# Start again
os.remove(path+os.sep+".chkbit")
elif len(files):
# Look for changes to known files
try:
if be_verbose == "-v": print " * Checking hashes"
lines = subprocess.check_output('chkbit -verify "{0}"'.format(path), stderr=subprocess.STDOUT,shell=True)
changed = process_chkbit(path, lines)
except subprocess.CalledProcessError as e:
print "Error: %s" % e
changed = process_chkbit(path, e.output)
if len(files):
# Check existing parity files
updated = False
for f in files:
entry_with_path = file_with_path(path, f)
if entry_with_path not in changed:# <-- can't work since changed might include very nested files.
# Just re-check everything
rc = check_parity(path, f)
if rc == par2_missing:
if do_missing or do_update:
update_parity(path, f)
updated = True
results[entry_with_path] = "A"
elif do_repair and rc == par2_repair:
if repair_with_parity(path, f):
results[entry_with_path] = "F"
else:
print "Avoided duplicate scan for "+entry_with_path
if updated:
print " "
if len(files) and not os.path.isfile(path+os.sep+".chkbit"):
if do_missing:
# Create the chkbit file if it was missing
# But do it after we validated any existing parity files
lines = subprocess.check_output('chkbit "{0}"'.format(path), stderr=subprocess.STDOUT,shell=True)
changed = process_chkbit(path, lines)
else:
results[path+os.sep+".chkbit"] = "??"
if do_recursive:
for entry in directories:
verify_directory(entry)
def show_results():
errors=[]
result_keys = results.keys()
if result_keys and len(result_keys):
if do_fast:
print "\n\nChecked {0} directories:".format(files_checked)
else:
print "\n\nChecked {0} files:".format(files_checked)
for f in sorted(result_keys):
if results[f] == 'u':
print " * '{0}' updated".format(f)
elif results[f] == '--':
print " '{0}' skipped".format(f)
elif results[f] == '^':
print " '{0}' tracked".format(f)
elif results[f] == 'a':
print " + '{0}' added".format(f)
elif results[f] == 'A':
print " ++ '{0}' created missing parity file".format(f)
elif results[f] == 'F':
print " ! '{0}' fixed".format(f)
elif results[f] == '?':
print " ? '{0}' not tracked".format(f)
elif results[f] == '??' or results[f] == 'E' or results[f] == 'r' or results[f] == 'P' or results[f] == 'R' or results[f] == 'M':
errors.append(f)
else:
print " !! '{0}' unknown key {1}".format(f, results[f])
if errors:
print "Detected '{0}' errors".format(len(errors))
for f in errors:
if results[f] == 'E':
print " !! '{0}' corrupted checksum".format(f)
elif results[f] == 'P':
print " !! '{0}' corrupted".format(f)
elif results[f] == 'M':
print " !! '{0}' missing".format(f)
elif results[f] == 'r':
print " !! '{0}' requires repair (hash)".format(f)
elif results[f] == 'R':
print " !! '{0}' requires repair (parity)".format(f)
elif results[f] == '??':
print " ?? '{0}' missing parity file".format(f)
sys.exit(1)
try:
for s in sources:
verify_directory(s)
except ValueError as e:
print "Processing halted: {0}".format(e)
show_results()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment