berdario/readrescue.py

## readrescue.py
import sys
import os

# This is basically shutil.copyfileobj, but ignoring IO errors when reading.
# After doing a simple experiment with btrfs checksumming, I wondered if it was possible
# to still read a file that gave error with `cp`. The answer is obviously yes, since
# the checksum errors will work at the block-level (thus, why the chunksize is bsize)

# If you actually want to recover corrupted files from btrfs, without having to restore
# every single file in the fs, you might want to use instead:
# `btrfs restore -D -v /dev/whatever restore_dir` to list all the files that would've been restored
# and
# `btrfs restore /dev/whatever restore_dir --path-regex PartOfFileName`

# I got inspiration to try this out while reading https://sites.google.com/site/michelealessandrini74/an-experiment-on-data-corruption-and-recovery-with-btrfs

def copyrescue(infile, out, chunksize, replacement=b'\0'):
    errors = 0
    written = None
    while written != 0:
        try:
            written = out.write(infile.read(chunksize))
        except OSError:
            errors += 1
            print('\rerrors:', errors, end='')
            out.write(replacement * chunksize)
            infile.seek(chunksize, 1)
    print()


if __name__ == '__main__':
    infile = sys.argv[1]
    chunksize = os.statvfs(infile).f_bsize
    print('chunksize:', chunksize)
    with open(infile, 'rb') as f, open(sys.argv[2], 'xb') as out:
        copyrescue(f, out, chunksize)
	import sys
	import os

	# This is basically shutil.copyfileobj, but ignoring IO errors when reading.
	# After doing a simple experiment with btrfs checksumming, I wondered if it was possible
	# to still read a file that gave error with `cp`. The answer is obviously yes, since
	# the checksum errors will work at the block-level (thus, why the chunksize is bsize)

	# If you actually want to recover corrupted files from btrfs, without having to restore
	# every single file in the fs, you might want to use instead:
	# `btrfs restore -D -v /dev/whatever restore_dir` to list all the files that would've been restored
	# and
	# `btrfs restore /dev/whatever restore_dir --path-regex PartOfFileName`

	# I got inspiration to try this out while reading https://sites.google.com/site/michelealessandrini74/an-experiment-on-data-corruption-and-recovery-with-btrfs

	def copyrescue(infile, out, chunksize, replacement=b'\0'):
	errors = 0
	written = None
	while written != 0:
	try:
	written = out.write(infile.read(chunksize))
	except OSError:
	errors += 1
	print('\rerrors:', errors, end='')
	out.write(replacement * chunksize)
	infile.seek(chunksize, 1)
	print()


	if __name__ == '__main__':
	infile = sys.argv[1]
	chunksize = os.statvfs(infile).f_bsize
	print('chunksize:', chunksize)
	with open(infile, 'rb') as f, open(sys.argv[2], 'xb') as out:
	copyrescue(f, out, chunksize)