Last active
December 11, 2018 07:24
-
-
Save strayge/076c0dfac32fe242fbf61550da4d705b to your computer and use it in GitHub Desktop.
Recover files from damaged disk using The Sleuth Kit and ddrescue (GNU)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# Script for combine The Sleuth Kit and ddrescue (GNU) | |
# It's allowed to restore from damaged drive only needed files | |
import argparse | |
import json | |
import os | |
import subprocess | |
import time | |
parser = argparse.ArgumentParser(description='Script for combine The Sleuth Kit and ddrescue (GNU)') | |
parser.add_argument('--inode', '-i', help='start inode, if not specified - starts from root') | |
parser.add_argument('--image', '-im', help='/path/to/sdxX.img') | |
parser.add_argument('--device', '-d', help='/dev/sdxX') | |
parser.add_argument('--log', '-l', help='/path/to/sdxX.log') | |
parser.add_argument('--recursive', '-r', action='store_true', help='recursive from selected inode') | |
parser.add_argument('--restore', '-w', action='store_true', help='restore files to "out" folder') | |
parser.add_argument('--generate', '-g', action='store_true', help='generate commands for ddrescue') | |
parser.add_argument('--stats', '-s', action='store_true', help='show statistic by extensions') | |
parser.add_argument('--dump-sectors', action='store_true', help='save sectors for finded files to sectors.txt') | |
parser.add_argument('--dump-blocks', action='store_true', help='save blocks for finded files to blocks.txt') | |
parser.add_argument('--type', '-t', help='filter for extensions: images, docs, archives, video, audio') | |
parser.add_argument('--follow', action='store_true', help='NOT IMPLEMENTED') | |
parser.add_argument('--restore_with_log', action='store_true', help='skip files with missed sectors in ddrescure log') | |
args = parser.parse_args() | |
device = args.device | |
image = args.image | |
log = args.log | |
if not image: | |
print('--image required') | |
exit() | |
# fls -f ntfs /mnt/hdd/sdb5.raw 108736-144-7 | |
command_line = 'fls -f ntfs -p' | |
args.inode = args.inode if args.inode else '' | |
if args.recursive: | |
command_line += ' -r' | |
command_line += ' %s %s' % (image, args.inode if args.inode else '') | |
if args.follow: | |
print('--folow not implemented (hdd permanently died while coding this part =(') | |
exit() | |
command_line += ' -v' | |
cmd_output = subprocess.check_output(command_line, shell=True) | |
cmd_output = cmd_output.decode('utf-8', errors='ignore') | |
# todo: parse output, grep all raw_read lines | |
# calculate and print ddrescue commands for this blocks | |
# this should help start recover files from hdd | |
# before TSK can read filelist from FS | |
# (for mft sectors and other parts needed for fylesystem) | |
exit() | |
print('Reading file list...') | |
cmd_output = subprocess.check_output(command_line, shell=True) | |
cmd_output = cmd_output.decode('utf-8', errors='ignore') | |
out_prefix_for_deleted_files = '____REMOVED____' | |
last_time_output = 0 | |
print('Parsing file list...') | |
extensions_types = { | |
'images': ('.jpeg', '.jpg', '.png', '.tiff', '.gif', '.cr2', '.tif', '.bmp', '.psd', '.crw', '.tga', '.svg'), | |
'docs': ('.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.rtf', '.djvu', '.pdf', '.fb2', '.txt', '.pptm', | |
'.xlsm', '.pps', '.ppsx', '.ppsm', '.pot', '.potx', '.odt', '.vsd', '.vsdx'), | |
'archives': ('.zip', '.rar', '.7z'), | |
'video': ('.mov', '.avi', '.mkv', '.mpg', '.mpeg', '.mp4', '.flv', '.3gp', '.3gpp', '.wmv', '.webm'), | |
'audio': ('.mp3', '.ogg', '.wav', '.cue', '.flac', '.wma', '.amr'), | |
} | |
if args.type and args.type not in extensions_types: | |
print('Unknown type') | |
exit() | |
# for stats | |
ext_stats = {} | |
ext_unknown = {} | |
files_in_dir = dict() | |
for line in cmd_output.split('\n'): | |
# read only files | |
if ':' in line and '/r' in line.split(':', 1)[0]: | |
filename = line.split(':', 1)[1].strip() | |
# stats | |
if args.stats: | |
for ext_type in extensions_types: | |
if filename.lower().endswith(extensions_types[ext_type]): | |
if ext_type in ext_stats: | |
ext_stats[ext_type] += 1 | |
else: | |
ext_stats[ext_type] = 1 | |
break | |
else: | |
if '.' in filename[-15:]: | |
ext = filename.lower().split('.')[-1] | |
if ext in ext_unknown: | |
ext_unknown[ext] += 1 | |
else: | |
ext_unknown[ext] = 1 | |
if args.type: | |
# skip special dirs | |
if filename.startswith('$'): | |
continue | |
# process only selected extensions | |
if not filename.lower().endswith(extensions_types[args.type]): | |
continue | |
# for some ntfs streams (maybe just skip it?) | |
filename = filename.replace(':', '_') | |
# replace special chars | |
filename = filename.replace('`', '_') | |
inode = line.split('/r', 1)[1].split(':', 1)[0].strip() | |
# rewrited file | |
if '(realloc)' in inode: | |
if '*' in inode: | |
# rewrited deleted file, just skip it | |
continue | |
else: | |
# it shouldn't be executed | |
print('bad inode:', line) | |
continue | |
# removed file | |
if '*' in inode: | |
inode = inode.strip('* ') | |
filename = os.path.join(out_prefix_for_deleted_files, filename) | |
# uncomment single line below to skip deleted files | |
# continue | |
#else: | |
# continue | |
if inode not in files_in_dir.keys(): | |
files_in_dir[inode] = filename | |
else: | |
# it shouldn't be executed | |
print('dublicate inode founded', line) | |
if args.stats: | |
print('\nstats:') | |
print(ext_stats) | |
ext_unknown_list = list(ext_unknown.items()) | |
ext_unknown_list.sort(reverse=True, key=lambda x: x[1]) | |
for ext, count in ext_unknown_list: | |
print('%s: %s' % (ext, count)) | |
print() | |
print('Founded %i items' % len(files_in_dir)) | |
# print(files_in_dir) | |
restored_blocks = [] | |
if args.restore_with_log: | |
if not log: | |
print('--log required for --restore_with_log') | |
exit() | |
f = open(log, 'r') | |
for line in f: | |
if not line.strip(): | |
continue | |
if line.strip()[0] == '#': | |
continue | |
line_parts = line.strip().split() | |
if len(line_parts) != 3: | |
continue | |
start_sector = int(line_parts[0].strip(), 0) | |
size_in_sectors = int(line_parts[1].strip(), 0) | |
block_status = line_parts[2].strip() | |
if block_status != '+': | |
continue | |
restored_blocks.append([start_sector, start_sector + size_in_sectors]) | |
f.close() | |
#print(restored_blocks) | |
#exit() | |
if args.restore: | |
print('Writing files to "out" folder...') | |
output_path = 'out' | |
os.makedirs(output_path, exist_ok=True) | |
last_time_print = 0 | |
counter = 0 | |
for inode, filename in files_in_dir.items(): | |
counter += 1 | |
if time.time() > last_time_print + 60: | |
print('Restoring file %i / %i' % (counter, len(files_in_dir))) | |
last_time_print = time.time() | |
if args.restore_with_log: | |
sectors = [] | |
# istat -f ntfs /mnt/hdd/sdb5.raw 108739-128-1 | |
command_line = 'istat -f ntfs %s %s' % (image, inode) | |
try: | |
cmd_output = subprocess.check_output(command_line, shell=True) | |
cmd_output = cmd_output.decode('utf-8') | |
for cmdline in cmd_output.split('\n'): | |
if len(cmdline) and cmdline.strip()[0].isdigit(): | |
new_sectors = cmdline.strip().split() | |
for s in new_sectors: | |
sectors.append(int(s)) | |
except subprocess.CalledProcessError: | |
print('subprocess.CalledProcessError', inode, filename) | |
continue | |
all_sectors_restored = True | |
for s in sectors: | |
for block in restored_blocks: | |
if block[0] <= (s * 4096) < block[1] and block[0] <= (s * 4096 + 4096) < block[1]: | |
break | |
else: | |
all_sectors_restored = False | |
break | |
if not all_sectors_restored: | |
print('Some sectors missed:', filename) | |
continue | |
# print(filename) | |
folders = filename.split('/')[:-1] | |
path = output_path | |
for f in folders: | |
path = os.path.join(path, f) | |
# print(path) | |
os.makedirs(path, exist_ok=True) | |
# icat -f ntfs /mnt/hdd/sdb5.raw 108739-128-1 | |
full_filename = os.path.join(output_path, filename) | |
try: | |
command_line = 'icat -f ntfs %s %s > "%s"' % (image, inode, full_filename) | |
subprocess.call(command_line, shell=True) | |
except: | |
print('error during resting file:', filename) | |
exit() | |
print('Reading allocated sectors...') | |
sectors = [] | |
counter = 0 | |
for inode, filename in files_in_dir.items(): | |
# print(inode) | |
counter += 1 | |
if time.time() > last_time_output + 5: | |
last_time_output = time.time() | |
print('reading sectors from inodes (%i / %i)' % (counter, len(files_in_dir))) | |
# istat -f ntfs /mnt/hdd/sdb5.raw 108739-128-1 | |
command_line = 'istat -f ntfs %s %s' % (image, inode) | |
try: | |
cmd_output = subprocess.check_output(command_line, shell=True) | |
cmd_output = cmd_output.decode('utf-8') | |
sectors_counter = 0 | |
for line in cmd_output.split('\n'): | |
# print(line[0]) | |
if len(line) and line[0].isdigit(): | |
new_sectors = line.split() | |
for s in new_sectors: | |
sectors.append(int(s)) | |
sectors_counter += 1 | |
# print('%s %i sectors "%s"' % (inode, sectors_counter, filename)) | |
except subprocess.CalledProcessError: | |
print('subprocess.CalledProcessError', inode, filename) | |
sectors.sort() | |
if args.dump_sectors: | |
f = open('sectors.txt', 'w') | |
json.dump(sectors, f) | |
f.close() | |
print('Calculating allocated blocks...') | |
blocks = [] # mb | |
sector_size = 4096 | |
# block_size_in_mb = 4 | |
range_for_union = 4 # mb | |
counter = 0 | |
for sector in sectors: | |
counter += 1 | |
if time.time() > last_time_output + 5: | |
last_time_output = time.time() | |
print('merging sectors (%i / %i)' % (counter, len(sectors))) | |
sector_in_mb = sector * sector_size / 1000 / 1000 | |
# sector_in_mib = sector * sector_size / 1024 / 1024 | |
if len(blocks) and sector_in_mb < blocks[-1][1] + range_for_union: | |
blocks[-1][1] = round(sector_in_mb + 2) | |
else: | |
blocks.append([round(sector_in_mb - 2), round(sector_in_mb + 2)]) | |
blocks.sort(key=lambda x: x[0]) | |
if args.dump_blocks: | |
f = open('blocks.txt', 'w') | |
json.dump(blocks, f) | |
f.close() | |
if args.generate: | |
if not device: | |
print('--device required for ddrescue generation') | |
exit() | |
if not log: | |
print('--log required for ddrescue generation') | |
exit() | |
for b in blocks: | |
pos = b[0] | |
size = b[1] - b[0] | |
print('ddrescue -K 10Mi -vvv -i %iMB -s %iMB -n %s %s %s' % (pos, size, device, image, log)) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment