Skip to content

Instantly share code, notes, and snippets.

@glasserc
Created September 14, 2012 02:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glasserc/3719406 to your computer and use it in GitHub Desktop.
Save glasserc/3719406 to your computer and use it in GitHub Desktop.
Quick hack to fix my git-annex repo
#! /usr/bin/python
# Quick hack to find and rearrange files from a broken git-annex.
#
# One of my drives with a git annex suffered major filesystem
# corruption, and pretty much all the files got dumped into
# lost+found. Fortunately git-annex symlinks contain all the
# information you need to find your files and see if they're still OK
# -- to wit, the filesize and a SHA.
import os
import stat
import shutil
import argparse
import subprocess
import sys
ALL_READ = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
ALL_EXC = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
class Main():
def __init__(self, argv=None):
self.parser = argparse.ArgumentParser(
description='find the files pointed to by broken git-annex links')
self.parser.add_argument('linksdir', type=str, help='directory where all the broken links are')
self.parser.add_argument('filesdir', type=str, help='directory where all the lost+found files are')
self.options = self.parser.parse_args(argv)
def find_broken_links(self):
find_output = subprocess.check_output(['find', '-L', self.options.linksdir,
'-type', 'l', '-print0'])
return find_output.strip('\0').split('\0')
def parse_annex_symlink(self, link):
target = os.readlink(link)
basename = os.path.basename(target)
# Not completely sure about the format of git annex
# links. This might break.
backend_size, hash = basename.split('--')
info = {'hash': hash}
backend, size = backend_size.split('-')
info['backend'] = backend
info['size'] = size.strip('s')
return info
def matchup_source(self, link):
info = self.parse_annex_symlink(link)
possibles = subprocess.check_output(['find', self.options.filesdir, '-type', 'f',
'-size', info['size'] + 'c', '-print0'])
possibles = possibles.strip('\0').split('\0')
if len(possibles) != 1:
print "WARNING: two matches for {0} with size {1}: {2}".format(
link, info['size'], possibles)
# FIXME: try all candidates
found = None
for possible in possibles:
hash = subprocess.check_output(['sha256sum', possible])
hash, _ = hash.split()
if hash == info['hash']:
found = possible
else:
print "WARNING: hash doesn't match up for {0} ({1}) and {2} ({3})".format(
link, info['hash'], possible, hash)
if not found:
print "No match found for {0}".format(link)
else:
dest = os.path.abspath(os.path.join(os.path.dirname(link), os.readlink(link)))
if not os.path.exists(os.path.dirname(dest)):
os.makedirs(os.path.dirname(dest))
# Unlock both source and dest, so that moving it doesn't
# cause problems
os.chmod(os.path.dirname(dest), 0777)
os.chmod(os.path.dirname(found), 0777)
subprocess.check_call(['mv', found, dest])
os.chmod(dest, ALL_READ)
os.chmod(os.path.dirname(dest), ALL_READ | ALL_EXC)
def run(self):
links = self.find_broken_links()
for link in links:
self.matchup_source(link)
if __name__ == '__main__':
m = Main()
m.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment