Skip to content

Instantly share code, notes, and snippets.

@strayge
Last active December 11, 2018 07:24
Show Gist options
  • Save strayge/076c0dfac32fe242fbf61550da4d705b to your computer and use it in GitHub Desktop.
Save strayge/076c0dfac32fe242fbf61550da4d705b to your computer and use it in GitHub Desktop.
Recover files from damaged disk using The Sleuth Kit and ddrescue (GNU)
#!/usr/bin/python3
# Script for combine The Sleuth Kit and ddrescue (GNU)
# It's allowed to restore from damaged drive only needed files
import argparse
import json
import os
import subprocess
import time
parser = argparse.ArgumentParser(description='Script for combine The Sleuth Kit and ddrescue (GNU)')
parser.add_argument('--inode', '-i', help='start inode, if not specified - starts from root')
parser.add_argument('--image', '-im', help='/path/to/sdxX.img')
parser.add_argument('--device', '-d', help='/dev/sdxX')
parser.add_argument('--log', '-l', help='/path/to/sdxX.log')
parser.add_argument('--recursive', '-r', action='store_true', help='recursive from selected inode')
parser.add_argument('--restore', '-w', action='store_true', help='restore files to "out" folder')
parser.add_argument('--generate', '-g', action='store_true', help='generate commands for ddrescue')
parser.add_argument('--stats', '-s', action='store_true', help='show statistic by extensions')
parser.add_argument('--dump-sectors', action='store_true', help='save sectors for finded files to sectors.txt')
parser.add_argument('--dump-blocks', action='store_true', help='save blocks for finded files to blocks.txt')
parser.add_argument('--type', '-t', help='filter for extensions: images, docs, archives, video, audio')
parser.add_argument('--follow', action='store_true', help='NOT IMPLEMENTED')
parser.add_argument('--restore_with_log', action='store_true', help='skip files with missed sectors in ddrescure log')
args = parser.parse_args()
device = args.device
image = args.image
log = args.log
if not image:
print('--image required')
exit()
# fls -f ntfs /mnt/hdd/sdb5.raw 108736-144-7
command_line = 'fls -f ntfs -p'
args.inode = args.inode if args.inode else ''
if args.recursive:
command_line += ' -r'
command_line += ' %s %s' % (image, args.inode if args.inode else '')
if args.follow:
print('--folow not implemented (hdd permanently died while coding this part =(')
exit()
command_line += ' -v'
cmd_output = subprocess.check_output(command_line, shell=True)
cmd_output = cmd_output.decode('utf-8', errors='ignore')
# todo: parse output, grep all raw_read lines
# calculate and print ddrescue commands for this blocks
# this should help start recover files from hdd
# before TSK can read filelist from FS
# (for mft sectors and other parts needed for fylesystem)
exit()
print('Reading file list...')
cmd_output = subprocess.check_output(command_line, shell=True)
cmd_output = cmd_output.decode('utf-8', errors='ignore')
out_prefix_for_deleted_files = '____REMOVED____'
last_time_output = 0
print('Parsing file list...')
extensions_types = {
'images': ('.jpeg', '.jpg', '.png', '.tiff', '.gif', '.cr2', '.tif', '.bmp', '.psd', '.crw', '.tga', '.svg'),
'docs': ('.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.rtf', '.djvu', '.pdf', '.fb2', '.txt', '.pptm',
'.xlsm', '.pps', '.ppsx', '.ppsm', '.pot', '.potx', '.odt', '.vsd', '.vsdx'),
'archives': ('.zip', '.rar', '.7z'),
'video': ('.mov', '.avi', '.mkv', '.mpg', '.mpeg', '.mp4', '.flv', '.3gp', '.3gpp', '.wmv', '.webm'),
'audio': ('.mp3', '.ogg', '.wav', '.cue', '.flac', '.wma', '.amr'),
}
if args.type and args.type not in extensions_types:
print('Unknown type')
exit()
# for stats
ext_stats = {}
ext_unknown = {}
files_in_dir = dict()
for line in cmd_output.split('\n'):
# read only files
if ':' in line and '/r' in line.split(':', 1)[0]:
filename = line.split(':', 1)[1].strip()
# stats
if args.stats:
for ext_type in extensions_types:
if filename.lower().endswith(extensions_types[ext_type]):
if ext_type in ext_stats:
ext_stats[ext_type] += 1
else:
ext_stats[ext_type] = 1
break
else:
if '.' in filename[-15:]:
ext = filename.lower().split('.')[-1]
if ext in ext_unknown:
ext_unknown[ext] += 1
else:
ext_unknown[ext] = 1
if args.type:
# skip special dirs
if filename.startswith('$'):
continue
# process only selected extensions
if not filename.lower().endswith(extensions_types[args.type]):
continue
# for some ntfs streams (maybe just skip it?)
filename = filename.replace(':', '_')
# replace special chars
filename = filename.replace('`', '_')
inode = line.split('/r', 1)[1].split(':', 1)[0].strip()
# rewrited file
if '(realloc)' in inode:
if '*' in inode:
# rewrited deleted file, just skip it
continue
else:
# it shouldn't be executed
print('bad inode:', line)
continue
# removed file
if '*' in inode:
inode = inode.strip('* ')
filename = os.path.join(out_prefix_for_deleted_files, filename)
# uncomment single line below to skip deleted files
# continue
#else:
# continue
if inode not in files_in_dir.keys():
files_in_dir[inode] = filename
else:
# it shouldn't be executed
print('dublicate inode founded', line)
if args.stats:
print('\nstats:')
print(ext_stats)
ext_unknown_list = list(ext_unknown.items())
ext_unknown_list.sort(reverse=True, key=lambda x: x[1])
for ext, count in ext_unknown_list:
print('%s: %s' % (ext, count))
print()
print('Founded %i items' % len(files_in_dir))
# print(files_in_dir)
restored_blocks = []
if args.restore_with_log:
if not log:
print('--log required for --restore_with_log')
exit()
f = open(log, 'r')
for line in f:
if not line.strip():
continue
if line.strip()[0] == '#':
continue
line_parts = line.strip().split()
if len(line_parts) != 3:
continue
start_sector = int(line_parts[0].strip(), 0)
size_in_sectors = int(line_parts[1].strip(), 0)
block_status = line_parts[2].strip()
if block_status != '+':
continue
restored_blocks.append([start_sector, start_sector + size_in_sectors])
f.close()
#print(restored_blocks)
#exit()
if args.restore:
print('Writing files to "out" folder...')
output_path = 'out'
os.makedirs(output_path, exist_ok=True)
last_time_print = 0
counter = 0
for inode, filename in files_in_dir.items():
counter += 1
if time.time() > last_time_print + 60:
print('Restoring file %i / %i' % (counter, len(files_in_dir)))
last_time_print = time.time()
if args.restore_with_log:
sectors = []
# istat -f ntfs /mnt/hdd/sdb5.raw 108739-128-1
command_line = 'istat -f ntfs %s %s' % (image, inode)
try:
cmd_output = subprocess.check_output(command_line, shell=True)
cmd_output = cmd_output.decode('utf-8')
for cmdline in cmd_output.split('\n'):
if len(cmdline) and cmdline.strip()[0].isdigit():
new_sectors = cmdline.strip().split()
for s in new_sectors:
sectors.append(int(s))
except subprocess.CalledProcessError:
print('subprocess.CalledProcessError', inode, filename)
continue
all_sectors_restored = True
for s in sectors:
for block in restored_blocks:
if block[0] <= (s * 4096) < block[1] and block[0] <= (s * 4096 + 4096) < block[1]:
break
else:
all_sectors_restored = False
break
if not all_sectors_restored:
print('Some sectors missed:', filename)
continue
# print(filename)
folders = filename.split('/')[:-1]
path = output_path
for f in folders:
path = os.path.join(path, f)
# print(path)
os.makedirs(path, exist_ok=True)
# icat -f ntfs /mnt/hdd/sdb5.raw 108739-128-1
full_filename = os.path.join(output_path, filename)
try:
command_line = 'icat -f ntfs %s %s > "%s"' % (image, inode, full_filename)
subprocess.call(command_line, shell=True)
except:
print('error during resting file:', filename)
exit()
print('Reading allocated sectors...')
sectors = []
counter = 0
for inode, filename in files_in_dir.items():
# print(inode)
counter += 1
if time.time() > last_time_output + 5:
last_time_output = time.time()
print('reading sectors from inodes (%i / %i)' % (counter, len(files_in_dir)))
# istat -f ntfs /mnt/hdd/sdb5.raw 108739-128-1
command_line = 'istat -f ntfs %s %s' % (image, inode)
try:
cmd_output = subprocess.check_output(command_line, shell=True)
cmd_output = cmd_output.decode('utf-8')
sectors_counter = 0
for line in cmd_output.split('\n'):
# print(line[0])
if len(line) and line[0].isdigit():
new_sectors = line.split()
for s in new_sectors:
sectors.append(int(s))
sectors_counter += 1
# print('%s %i sectors "%s"' % (inode, sectors_counter, filename))
except subprocess.CalledProcessError:
print('subprocess.CalledProcessError', inode, filename)
sectors.sort()
if args.dump_sectors:
f = open('sectors.txt', 'w')
json.dump(sectors, f)
f.close()
print('Calculating allocated blocks...')
blocks = [] # mb
sector_size = 4096
# block_size_in_mb = 4
range_for_union = 4 # mb
counter = 0
for sector in sectors:
counter += 1
if time.time() > last_time_output + 5:
last_time_output = time.time()
print('merging sectors (%i / %i)' % (counter, len(sectors)))
sector_in_mb = sector * sector_size / 1000 / 1000
# sector_in_mib = sector * sector_size / 1024 / 1024
if len(blocks) and sector_in_mb < blocks[-1][1] + range_for_union:
blocks[-1][1] = round(sector_in_mb + 2)
else:
blocks.append([round(sector_in_mb - 2), round(sector_in_mb + 2)])
blocks.sort(key=lambda x: x[0])
if args.dump_blocks:
f = open('blocks.txt', 'w')
json.dump(blocks, f)
f.close()
if args.generate:
if not device:
print('--device required for ddrescue generation')
exit()
if not log:
print('--log required for ddrescue generation')
exit()
for b in blocks:
pos = b[0]
size = b[1] - b[0]
print('ddrescue -K 10Mi -vvv -i %iMB -s %iMB -n %s %s %s' % (pos, size, device, image, log))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment