Created
November 15, 2020 07:54
-
-
Save jascotty2/940238522a1decd3dc39be6b64579487 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Copyright (C) 2020 Jacob Scott <jascottytechie@gmail.com> | |
* | |
* Description: THEOS Disk Image File Extraction | |
* | |
* This program is free software: you can redistribute it and/or modify it under | |
* the terms of the GNU General Public License as published by the Free Software | |
* Foundation, either version 3 of the License, or (at your option) any later | |
* version. | |
* | |
* This program is distributed in the hope that it will be useful, but WITHOUT | |
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | |
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | |
* details. | |
* | |
* You should have received a copy of the GNU General Public License along with | |
* this program. If not, see <http://www.gnu.org/licenses/>. | |
*/ | |
// raw disk image to use | |
$file = 'disk.img'; | |
// folder to save recovered files to | |
$outFolder = 'recovered'; | |
// should library files be separated into folders instead of using the dot notation? | |
$librariesAsFolders = true; | |
// which users to extract files from | |
// 0 == system files | |
$userFiles = [0, 4]; | |
// should we ignore empty files? | |
$skipEmptyFiles = true; | |
// examine the files but don't extract? | |
$dryRun = false; | |
// log files and progress to a file? | |
$logResults = true; | |
$logFile = '_recovery_'.basename($file).'.log'; | |
// local system timezone required to get the extracted file dates correct | |
date_default_timezone_set('America/Chicago'); | |
/////////////////////////////////////////////////////////////////////////// | |
/////////////////////// Begin Program Logic /////////////////////// | |
/////////////////////////////////////////////////////////////////////////// | |
// partition types | |
define('DISK_THEOS_2G', 56); | |
define('DISK_THEOS_SPANNED', 57); | |
define('DISK_THEOS_4G', 58); | |
define('DISK_THEOS_4G_EXTENDED', 59); | |
// I suspect these are bit fields, but unable to narrow down what 32/x20 is for | |
define('FILE_PROGRAM_1', 1); | |
define('FILE_INDEXED', 2); | |
define('FILE_RELATIVE', 8); | |
define('FILE_STREAM', 16); | |
define('FILE_PROGRAM', 33); | |
define('FILE_FOLDER', 64); | |
define('FILE_SYS_EXE', 65); // VIR.CMD386 & SYSTEM.THEOS32 programs | |
define('FILE_LIBRARY', 128); | |
define('FILE_MEMBER', 192); // member of a library | |
define('FILE_DELETED', 255); | |
// for logging: | |
$types = [ | |
1 => 'PROGRAM', | |
2 => 'INDEX', | |
8 => 'RELATIVE', | |
16 => 'STREAM', | |
33 => 'PROGRAM', | |
64 => 'FOLDER', | |
65 => 'SYS_EXE', | |
128 => 'LIBRARY', | |
192 => 'MEMBER', | |
255 => 'DELETED' | |
]; | |
// Permission bitfield | |
define('PERM_UR', 1); | |
define('PERM_UW', 2); | |
define('PERM_UX', 4); | |
define('PERM_UE', 8); | |
define('PERM_OR', 16); | |
define('PERM_OW', 32); | |
define('ATTR_MODIFIED', 64); | |
define('ATTR_VISIBLE', 128); | |
// extended ASCII | |
// TODO? Option for converting ASCII files to UTF-8 with these translated codes? | |
$characterReplacements = [ | |
// 129-159 have no symbols mapped to them | |
"\x81" => "", //129 | |
"\x82" => "", //130 | |
"\x83" => "", //131 | |
"\x84" => "", //132 | |
"\x85" => "", //133 | |
"\x86" => "", //134 | |
"\x87" => "", //135 | |
"\x88" => "", //136 | |
"\x89" => "", //137 | |
"\x8A" => "", //138 | |
"\x8B" => "", //139 | |
"\x8C" => "", //140 | |
"\x8D" => "", //141 | |
"\x8E" => "", //142 | |
"\x8F" => "", //143 | |
"\x90" => "", //144 | |
"\x91" => "", //145 | |
"\x92" => "", //146 | |
"\x93" => "", //147 | |
"\x94" => "", //148 | |
"\x95" => "", //149 | |
"\x96" => "", //150 | |
"\x97" => "", //151 | |
"\x98" => "", //152 | |
"\x99" => "", //153 | |
"\x9A" => "", //154 | |
"\x9B" => "", //155 | |
"\x9C" => "", //156 | |
"\x9D" => "", //157 | |
"\x9E" => "", //158 | |
"\x9F" => "", //159 | |
"\xA0" => "\xE2\x94\x8C", //160 ┌ | |
"\xA1" => "\xE2\x94\x90", //161 ┐ | |
"\xA2" => "\xE2\x94\x98", //162 ┘ | |
"\xA3" => "\xE2\x94\x94", //163 └ | |
"\xA4" => "\xE2\x94\xBC", //164 ┼ | |
"\xA5" => "\xE2\x94\x9C", //165 ├ | |
"\xA6" => "\xE2\x94\xA4", //166 ┤ | |
"\xA7" => "\xE2\x94\xAC", //167 ┬ | |
"\xA8" => "\xE2\x94\xB4", //168 ┴ | |
"\xA9" => "\xE2\x94\x80", //169 ─ | |
"\xAA" => "\xE2\x94\x82", //170 │ | |
"\xAB" => "\xE2\x95\xAD", //171 ╭ | |
"\xAC" => "\xE2\x95\xAE", //172 ╮ | |
"\xAD" => "\xE2\x95\xAF", //173 ╯ | |
"\xAE" => "\xE2\x95\xB0", //174 ╰ | |
"\xAF" => "\xE2\x95\x94", //175 ╔ | |
"\xB0" => "\xE2\x95\x97", //176 ╗ | |
"\xB1" => "\xE2\x95\x9D", //177 ╝ | |
"\xB2" => "\xE2\x95\x9A", //178 ╚ | |
"\xB3" => "\xE2\x95\xAC", //179 ╬ | |
"\xB4" => "\xE2\x95\xA0", //180 ╠ | |
"\xB5" => "\xE2\x95\xA3", //181 ╣ | |
"\xB6" => "\xE2\x95\xA6", //182 ╦ | |
"\xB7" => "\xE2\x95\xA9", //183 ╩ | |
"\xB8" => "\xE2\x95\x90", //184 ═ | |
"\xB9" => "\xE2\x95\x91", //185 ║ | |
// no symbols again from 186-191 | |
"\xBA" => "", //186 | |
"\xBB" => "", //187 | |
"\xBC" => "", //188 | |
"\xBD" => "", //189 | |
"\xBE" => "", //190 | |
"\xBF" => "", //191 | |
"\xC0" => "\xC3\x84", //192 Ä | |
"\xC1" => "\xC3\xA4", //193 ä | |
"\xC2" => "\xC3\xA2", //194 â | |
"\xC3" => "\xC3\xA0", //195 à | |
"\xC4" => "\xC3\xA1", //196 á | |
"\xC5" => "\xC3\x89", //197 É | |
"\xC6" => "\xC3\xAB", //198 ë | |
"\xC7" => "\xC3\xAA", //199 ê | |
"\xC8" => "\xC3\xA8", //200 è | |
"\xC9" => "\xC3\xA9", //201 é | |
"\xCA" => "\xC3\xAF", //202 ï | |
"\xCB" => "\xC3\xAE", //203 î | |
"\xCC" => "\xC3\xAC", //204 ì | |
"\xCD" => "\xC3\xAD", //205 í | |
"\xCE" => "\xC3\x96", //206 Ö | |
"\xCF" => "\xC3\xB6", //207 ö | |
"\xD0" => "\xC3\xB4", //208 ô | |
"\xD1" => "\xC3\xB2", //209 ò | |
"\xD2" => "\xC3\xB3", //210 ó | |
"\xD3" => "\xC3\x9C", //211 Ü | |
"\xD4" => "\xC3\xBC", //212 ü | |
"\xD5" => "\xC3\xBB", //213 û | |
"\xD6" => "\xC3\xB9", //214 ù | |
"\xD7" => "\xC3\xBA", //215 ú | |
"\xD8" => "\xC3\x87", //216 Ç | |
"\xD9" => "\xC3\xA7", //217 ç | |
"\xDA" => "\xC3\x91", //218 Ñ | |
"\xDB" => "\xC3\xB1", //219 ñ | |
"\xDC" => "\xC3\x86", //220 Æ | |
"\xDD" => "\xC3\xA6", //221 æ | |
"\xDE" => "\xC3\x85", //222 Å | |
"\xDF" => "\xC3\xA5", //223 å | |
"\xE0" => "\xC3\x9F", //224 ß | |
"\xE1" => "\xC2\xBF", //225 ¿ | |
"\xE2" => "\xC2\xA1", //226 ¡ | |
"\xE3" => "\xC2\xA2", //227 ¢ | |
"\xE4" => "\xC2\xA3", //228 £ | |
"\xE5" => "\xC2\xA5", //229 ¥ | |
"\xE6" => "\xE2\x82\xA7", //230 ₧ // now-defunct spanish currency (peseta) | |
"\xE7" => "\xE2\x82\xAC", //231 € | |
"\xE8" => "\xC2\xBC", //232 ¼ | |
"\xE9" => "\xC2\xBD", //233 ½ | |
"\xEA" => "\xC3\xBF", //234 ÿ | |
"\xEB" => "\xC2\xA7", //235 § | |
"\xEC" => "\xE2\x97\xBC", //236 ◼ | |
"\xED" => "\xC2\xB2", //237 ² | |
"\xEE" => "\xC2\xA9", //238 © | |
"\xEF" => "\xC2\xAE", //239 ® | |
// no more characters used after 239 | |
"\xF0" => "", //240 | |
"\xF1" => "", //241 | |
"\xF2" => "", //242 | |
"\xF3" => "", //243 | |
"\xF4" => "", //244 | |
"\xF5" => "", //245 | |
"\xF6" => "", //246 | |
"\xF7" => "", //247 | |
"\xF8" => "", //248 | |
"\xF9" => "", //249 | |
"\xFA" => "", //250 | |
"\xFB" => "", //251 | |
"\xFC" => "", //252 | |
"\xFD" => "", //253 | |
"\xFE" => "", //254 | |
"\xFF" => "", //255 | |
]; | |
$fh = fopen($file, 'rb+'); | |
if (!$fh) die('failed to open file: '.$file); | |
// check for the boot sector | |
fseek($fh, 510, SEEK_SET); | |
$mbr = fread($fh, 2); | |
if ($mbr != "\x55\xAA") die('file is not a disk image'); | |
// output folder | |
if (($logResults || !$dryRun) && !file_exists($outFolder)) { | |
mkdir($outFolder, 077, true) or die("Failed to create output directory: $outFolder\n"); | |
} else if(!is_dir($outFolder)) { | |
die("Failed to create output directory: $outFolder\n"); | |
} | |
$flog = $logResults ? fopen($outFolder.'/'.$logFile, 'w') : null; | |
$flog && fputs($flog, "Reading $file\n"); | |
$partitionStart = 0; | |
$partitions = read_mbr($fh); | |
if (($c = count($partitions)) != 0) { | |
// MBR with partition data | |
$partitionStart = $partitions[0]['offset']; | |
// just guessing an HD is a system disk | |
$systemDisk = true; | |
foreach ($partitions as $part) { | |
echo "Disk partition detected: {$part['type-name']} at {$part['offset']} with {$part['sectors']} sectors\n"; | |
$flog && fputs($flog, "Disk partition detected: {$part['type-name']} at {$part['offset']} with {$part['sectors']} sectors\n"); | |
} | |
if ($c != 1) { | |
// (My disk only has one partition) | |
echo "Running recovery for first partition only\n"; | |
$flog && fputs($flog, "Running recovery for first partition only\n"); | |
} | |
//$nullPad = "\xE6\x6E"; // hard drive uses a different fill byte pattern | |
} else { | |
// floppy disks don't have a partition table | |
$systemDisk = false; | |
//$nullPad = "\xE5"; | |
} | |
fseek($fh, 1024 + $partitionStart, SEEK_SET); | |
$partitionData = read_partitionrec($fh); | |
if ($partitionData['indexTableSize'] == 0) die ('Invalid Partition Definition'); | |
$indexFiles = $partitionData['indexTableSize']; | |
echo "Reading disk image partition {$partitionData['label']} from offset $partitionStart\n"; | |
$flog && fputs($flog, "Disk Label: {$partitionData['label']}\n". | |
"{$partitionData['cylinders']} cylinders, {$partitionData['heads']} heads, {$partitionData['sectors']} sectors\n". | |
//"Using ".(strtoupper(bin2hex($nullPad)))." as NULL\n". | |
"Loading $indexFiles file records starting at ".(1024 + $partitionStart)."\n"); | |
// Load the file table, starting with files from main index | |
// main file index starts immediately after partition definition | |
fseek($fh, 1024 + $partitionStart + 256, SEEK_SET); | |
$fs_files = []; | |
for($i = 0; $i < $indexFiles; ++$i) { | |
if (($rec = read_filerec($fh)) !== null) { | |
// extract libraries into their own directory | |
if ($librariesAsFolders && $rec['type'] == FILE_LIBRARY) { | |
$rec['name'] = str_replace('.', '/', $rec['name']); | |
} | |
$fs_files[] = $rec; | |
} | |
} | |
$mainFound = count($fs_files); | |
// after the index are 1 or 2 blocks of data | |
// from documentation, seems to be a free space map block + an optional bad sector map block | |
// (1 block on a floppy, 2 on a disk) (2nd segment on disk isn't filled with any data after #11) | |
// appear to be organized in 10-byte segments | |
// per System Reference pg 751 detailing the File System, the layout of these blocks + the disk volume label block and these two | |
// are desribed in an VLB.H file included if the system has the "THEOS C" addon (not included the copy I have) | |
// load extended fragment data | |
for($i = 0; $i < $mainFound; ++$i) { | |
if($fs_files[$i]['extended1'] != 0 || $fs_files[$i]['extended2'] != 0) { | |
read_filerecExtended($fh, $partitionStart, $fs_files[$i]); | |
} | |
} | |
$flog && fputs($flog, "$mainFound Files loaded from main directory index\n"); | |
// folders & library files use a trick to store more files: there's a binary stream at the 'file' in the filesystem format | |
for($i = 0; $i < $mainFound; ++$i) { | |
if ($fs_files[$i]['type'] == FILE_LIBRARY || $fs_files[$i]['type'] == FILE_FOLDER) { | |
load_subfiles($fh, $partitionStart, $fs_files[$i], $fs_files, $librariesAsFolders); | |
} | |
} | |
$flog && fputs($flog, (count($fs_files) - $mainFound)." Files loaded from folders and libraries\n"); | |
// output file table | |
if ($flog) { | |
$users = !$systemDisk ? null : load_users($fh, $fs_files, $partitionStart); | |
if($systemDisk) { | |
fputs($flog, "\nSystem Users:\n"); | |
useridsort($users); | |
fputs($flog, "┌────────┬──┬────┬────┬────────────────┬────────────────┬────────────────┬────────────────────────┐\n"); | |
fputs($flog, "│Username│ID│Priv│Mail│ Path │ Prompt │ Library │ Password Bytes │\n"); | |
fputs($flog, "├────────┼──┼────┼────┼────────────────┼────────────────┼────────────────┼────────────────────────┤\n"); | |
foreach ($users as $user) { | |
fputs($flog, sprintf("│%-8s│%2s│%4d│%4s│%-16s│%-16s│%-16s│%24s│\n", | |
$user['NAME'], explode(' ', $user['ID'])[0], $user['PRIV'], $user['MAIL'] ?? 'NO', | |
$user['PATH'] ?? '', $user['PROMPT'] ?? '', $user['LIBRARY'] ?? '', | |
isset($user['PASSWORD']) ? strtoupper(implode(' ', str_split(bin2hex($user['PASSWORD']), 2))) : '')); | |
} | |
fputs($flog, "└────────┴──┴────┴────┴────────────────┴────────────────┴────────────────┴────────────────────────┘\n"); | |
} | |
$fsOffset = 0; | |
fputs($flog, "\nMain File Table, $mainFound / $indexFiles Files:\n"); | |
fputs($flog, "┌────────┬────────┬────────┬─┬────────┬────────┬─────┬─────────┬────────┬────┬────┬────────┬────┬──────────┬────┬──────────┐\n"); | |
fputs($flog, "│Filename│Filetype│Membname│:│ User │ Date │Time │ Org. │ Size │Recl│Keyl│ Perms │Grow│ Position │Frag│Index Pos │\n"); | |
fputs($flog, "├────────┼────────┼────────┼─┼────────┼────────┼─────┼─────────┼────────┼────┼────┼────────┼────┼──────────┼────┼──────────┤\n"); | |
$main = array_slice($fs_files, $fsOffset, $mainFound); | |
filesort($main); | |
$extraFiles = []; | |
$nExtraFiles = 0; | |
foreach ($main as $file) { | |
if ($file['type'] == FILE_LIBRARY || $file['type'] == FILE_FOLDER) { | |
$extraFiles[] = $file; | |
++$nExtraFiles; | |
} | |
$recl = $keyl = ''; | |
if($file['type'] != FILE_DELETED && $file['type'] & 1) { | |
$recl = sprintf("%04s", strtoupper(dechex($file['reclen']))); | |
$keyl = sprintf("%04s", strtoupper(dechex($file['keylen']))); | |
} else if ($file['type'] == FILE_RELATIVE) { | |
$recl = $file['reclen']; | |
} else if ($file['type'] == FILE_LIBRARY) { | |
$recl = $file['filesize'] / 64; | |
} else if($file['type'] == FILE_INDEXED) { | |
$recl = $file['reclen']; | |
$keyl = $file['keylen']; | |
} | |
// fragments | |
/*$frags = ''; | |
for($f = 0; $f < 7; ++$f) { | |
$b = !isset($file['fileparts'][$f]); | |
$frags .= sprintf("%8d│%10d│", | |
$b ? '' : $file['fileparts'][$f]['allocated'], | |
$b ? '' : $file['fileparts'][$f]['offset']); | |
}*/ | |
fputs($flog, sprintf("│%-8s│%-8s│%-8s│%s│%-8s│%02d/%02d/%02d│%02d:%02d│%-9s│%8d│%4s│%4s│%8s│%4s│%10d│%4s│%10d│\n", | |
$file['name1'], $file['name2'], '', | |
$partitionStart == 0 ? 'F' : 'S', // just simulating the drive letter :) | |
$users == null ? sprintf("%8s", $file['user']) : (isset($users[$file['user']]) ? $users[$file['user']]['NAME'] : '??'), | |
$file['modified_month'], $file['modified_day'], substr($file['modified_year'], 2), | |
$file['modified_hour'], $file['modified_minute'], | |
$types[$file['type']] ?? ''.$file['type'], | |
$file['filesize'], | |
$recl, $keyl, | |
theosPerms($file), $file['grow'], | |
$file['offset'] + $partitionStart, count($file['fileparts']), $file['def_pos']/*, $frags*/)); | |
} | |
fputs($flog, "└────────┴────────┴────────┴─┴────────┴────────┴─────┴─────────┴────────┴────┴────┴────────┴────┴──────────┴────┴──────────┘\n"); | |
for($i = 0; $i < $nExtraFiles; ++$i) { | |
$mainfile = $extraFiles[$i]; | |
$count = $mainfile['count'] ?? 0; | |
$max = $mainfile['filesize'] / 64; | |
fputs($flog, "\n{$mainfile['name']} File Table, $count / $max Files:\n"); | |
fputs($flog, "┌────────┬────────┬────────┬─┬────────┬────────┬─────┬─────────┬────────┬────┬────┬────────┬────┬──────────┬────┬──────────┐\n"); | |
fputs($flog, "│Filename│Filetype│Membname│:│ User │ Date │Time │ Org. │ Size │Recl│Keyl│ Perms │Grow│ Position │Frag│Index Pos │\n"); | |
fputs($flog, "├────────┼────────┼────────┼─┼────────┼────────┼─────┼─────────┼────────┼────┼────┼────────┼────┼──────────┼────┼──────────┤\n"); | |
if ($count > 0) { | |
$index = $mainfile['members']; | |
sort($index); | |
foreach ($index as $file) { | |
if ($file['type'] == FILE_LIBRARY || $file['type'] == FILE_FOLDER) { | |
$extraFiles[] = $file; | |
++$nExtraFiles; | |
} | |
$recl = $keyl = ''; | |
if($file['type'] != FILE_DELETED && $file['type'] & 1) { | |
$recl = sprintf("%04s", strtoupper(dechex($file['reclen']))); | |
$keyl = sprintf("%04s", strtoupper(dechex($file['keylen']))); | |
} else if ($file['type'] == FILE_RELATIVE) { | |
$recl = $file['reclen']; | |
} else if ($file['type'] == FILE_LIBRARY) { | |
$recl = $file['filesize'] / 64; | |
} else if($file['type'] == FILE_INDEXED) { | |
$recl = $file['reclen']; | |
$keyl = $file['keylen']; | |
} | |
// fragments | |
/* | |
$frags = ''; | |
for($f = 0; $f < count($file['fileparts']); ++$f) { | |
$b = false;//!isset($file['fileparts'][$f]); | |
$frags .= sprintf("%8d│%10d│", | |
$b ? '' : $file['fileparts'][$f]['allocated'], | |
$b ? '' : $file['fileparts'][$f]['offset']); | |
}//*/ | |
fputs($flog, sprintf("│%-8s│%-8s│%-8s│%s│%-8s│%02d/%02d/%02d│%02d:%02d│%-9s│%8d│%4s│%4s│%8s│%4s│%10d│%4s│%10d│\n", //%s\n", | |
$mainfile['name1'], $mainfile['name2'], $file['name1'], | |
$partitionStart == 0 ? 'F' : 'S', // just simulating the drive letter :) | |
$users == null ? sprintf("%8s", $file['user']) : (isset($users[$file['user']]) ? $users[$file['user']]['NAME'] : '??'), | |
$file['modified_month'], $file['modified_day'], substr($file['modified_year'], 2), | |
$file['modified_hour'], $file['modified_minute'], | |
$types[$file['type']] ?? ''.$file['type'], | |
$file['filesize'], | |
$recl, $keyl, | |
theosPerms($file), $file['grow'], | |
$file['offset'] + $partitionStart, count($file['fileparts']), $file['def_pos']/*, $frags*/)); | |
} | |
} | |
fputs($flog, "└────────┴────────┴────────┴─┴────────┴────────┴─────┴─────────┴────────┴────┴────┴────────┴────┴──────────┴────┴──────────┘\n"); | |
} | |
} | |
// utc offset required for extracting with the correct datetime | |
$utc_offset = date('Z') / 3600; | |
$utc_offset = sprintf(($utc_offset < 0 ? '-' : '+').'%02s00', abs($utc_offset)); | |
if (!$dryRun) { | |
// create folders for extracted data | |
foreach ($fs_files as $file) { | |
// extract libraries into their own directory | |
if ($librariesAsFolders && $file['type'] == FILE_LIBRARY) { | |
$dir = $outFolder.'/'.substr($file['name'], 0, strrpos($file['name'], '/')); | |
if (!file_exists($dir)) { | |
mkdir($dir); | |
write_filetime($dir, $file, $utc_offset); | |
} | |
} | |
if($file['type'] == FILE_FOLDER || $file['type'] == FILE_LIBRARY) { | |
$dir = $outFolder.'/'.$file['name']; | |
if (!file_exists($dir)) { | |
mkdir($dir); | |
write_filetime($dir, $file, $utc_offset); | |
} | |
} | |
} | |
} | |
// extract data | |
$saved = []; | |
foreach ($fs_files as $file) { | |
// check user bit | |
if($userFiles != null && array_search($file['user'], $userFiles) === false) { | |
//$flog && fputs($flog, "Skipping {$file['name']} (Not in users list: {$file['user']})\n"); | |
echo "Skipping {$file['name']} (Not in users list: {$file['user']})\n"; | |
continue; | |
} else if ($file['type'] == FILE_DELETED) { | |
//$flog && fputs($flog, "Skipping {$file['name']} (Deleted File)\n"); | |
echo "Skipping {$file['name']} (Deleted File)\n"; | |
continue; | |
} else if (array_search($file['name'], $saved) !== false) { | |
//$flog && fputs($flog, "Skipping {$file['name']} (Duplicate File)\n"); | |
echo "Skipping {$file['name']} (Duplicate File)\n"; | |
continue; | |
} else if ($skipEmptyFiles && $file['filesize'] == 0) { | |
//$flog && fputs($flog, "Skipping {$file['name']} (0-Length File)\n"); | |
echo "Skipping {$file['name']} (0-Length File)\n"; | |
continue; | |
} | |
$saved[] = $file['name']; | |
$type = $types[$file['type']] ?? ''.$file['type']; | |
if ($file['type'] != FILE_LIBRARY && $file['type'] != FILE_FOLDER) { | |
$fn = $outFolder.'/'.($file['type'] != FILE_DELETED ? '' : '$').$file['name']; | |
if ($file['allocated'] == 0) { | |
echo "Empty File {$file['name']}\n"; | |
$flog && fputs($flog, "Empty File {$file['name']}\n"); | |
touch($fn); | |
write_filetime($fn, $file, $utc_offset); | |
} else { | |
$flog && fputs($flog, sprintf("Extracting: %-30s %12s %10u < %9u b %5u r %5u k (%12u)\n", | |
$file['name'], $type, $file['offset'] + $partitionStart, $file['filesize'], $file['reclen'], $file['keylen'], $file['def_pos'])); | |
// extract this file | |
$bytes = read_file($fh, $file, $partitionStart); | |
if (strlen($bytes) < $file['filesize']) { | |
echo "\n!!!! BAD FILE - Missing data for {$file['name']} !!!!\n\n"; | |
$flog && fputs($flog, "\n!!!! BAD FILE - Missing data for {$file['name']} !!!!\n\n"); | |
continue; | |
} | |
$sections = count($file['fileparts']); | |
echo "Extracting {$file['name']}: {$file['allocated']} allocated bytes, {$file['filesize']} bytes long, $sections parts\n"; | |
if (!$dryRun) { | |
$f = fopen($fn, 'wb'); | |
fwrite($f, $bytes); | |
fclose($f); | |
write_filetime($fn, $file, $utc_offset); | |
} | |
} | |
} | |
} | |
// All done! | |
fclose($fh); | |
$flog && fclose($flog); | |
exit; | |
////////////////////////////////////////////////////////////////////////// | |
///////////////////// Begin Function Definitions ///////////////////// | |
////////////////////////////////////////////////////////////////////////// | |
/* | |
File User modified File Rec Key Alloc Block Alloc Block Alloc Block Alloc Block Alloc Block Extra1 Extra2 Growth | |
File/Member Name File Extension Typ|Owner|Perm|date time| | Size | |Len| |Len| |Byt| |Offset| | #2| |Off #2| | #3| |Off #3| | #4| |Off #4| | #5| |Off #5| |Offset| |Offset| Percentage | |
|---------------------| |---------------------|\-|\----|\--/|---| |---| |---------| |---| |---| |---| |------| |---| |------| |---| |------| |---| |------| |---| |------| |------| |------| |-/ | |
41 43 43 4F 55 4E 54 20 00 00 00 00 00 00 00 00 10 00 00 7F 6A AE 50 1F CD 13 00 00 00 00 00 00 01 00 5A 9A 00 10 00 59 CE 00 01 00 7A 1D 1A 01 00 A5 D2 00 01 00 EF 26 06 00 00 00 00 00 00 00 | |
*/ | |
function load_subfiles($fh, $partitionStart, &$file, &$records, $librariesAsFolders) { | |
// per the documentation, folder indexes are streams and may grow as any other file may | |
// The index will need to be loaded as a file, not as a linear stream from the disk, and folders may be infinitely nested | |
// first load in the index: | |
$index = read_file($fh, $file, $partitionStart); | |
// track file position marker for debugging: | |
$fileShard = 0; | |
$fileShardSize = $file['allocated']; | |
$filePos = $file['offset'] + $partitionStart; | |
$files = []; | |
// the 'file' is the file table to load, 64 bytes at a time: | |
for($indexPos = 0; $indexPos < $file['allocated']; $indexPos += 64) { | |
// file position marker | |
if ($indexPos >= $fileShardSize) { | |
++$fileShard; | |
$fileShardSize += $file['fileparts'][$fileShard]['allocated']; | |
$filePos = $file['fileparts'][$fileShard]['offset'] + $partitionStart; | |
} | |
// read the file record | |
if (($rec = read_filerec_file($index, $indexPos)) !== null) { | |
$rec['name'] = $file['name'].'/'.$rec['name']; | |
$rec['def_pos'] = $filePos + $indexPos; | |
if ($librariesAsFolders && $rec['type'] == FILE_LIBRARY) { | |
$rec['name'] = str_replace('.', '/', $rec['name']); | |
} | |
if($rec['extended1'] != 0 || $rec['extended2'] != 0) { | |
read_filerecExtended($fh, $partitionStart, $rec); | |
} | |
$files[] = $rec; | |
} | |
} | |
$file['count'] = count($files); | |
// now run recursive: | |
for($i = 0; $i < $file['count']; ++$i) { | |
if ($files[$i]['type'] == FILE_LIBRARY || $files[$i]['type'] == FILE_FOLDER) { | |
load_subfiles($fh, $partitionStart, $files[$i], $records, $librariesAsFolders); | |
} | |
} | |
// save library/folder members: | |
$file['members'] = $files; | |
// append to the main array for easy extraction | |
$records = array_merge($records, $files); | |
} | |
function read_filerec($fh) { | |
$attr = fread($fh, 64); | |
$rec = read_filerec_file($attr, 0); | |
$attr = null; | |
return $rec == null ? null : array_merge($rec, ['def_pos'=> ftell($fh) - 64]); | |
} | |
function read_filerec_file(&$attr, $offset) { | |
$name1 = trim(str_replace('$', '_', substr($attr, $offset + 0, 8))); | |
$name2 = trim(str_replace('$', '_', substr($attr, $offset + 8, 8))); | |
if ($name1 == '') return null; | |
$name = str_replace('$', '_', $name1.($name2 == '' ? '' : '.'.$name2)); | |
$offset += 16; | |
// file type is the first byte | |
$type = hexDecE($attr, $offset + 0, 1, false); | |
// file owner | |
$owner = hexDecE($attr, $offset + 1, 2, false); | |
// file permission flags | |
$perms = hexDecE($attr, $offset + 3, 1, false); | |
// dates are stored in a bit stream. It's weird, don't ask. | |
// doubly-weird, this is the *one* time in the index table that a value is stored big-endian | |
$dateStr = sprintf('%0'.(4 * 8).'b', hexDecE($attr, $offset + 4, 4, true)); | |
$year = 1986 + bindec(substr($dateStr, 0, 6)); // year is simply the number of years since 1986 | |
$month = bindec(substr($dateStr, 6, 4)); | |
$day = bindec(substr($dateStr, 10, 5)); | |
// $time = bindec(substr($dateStr, 15, 17)); // this is the time, but somehow also increases faster than 1 per second at an irregular interval | |
$hour = bindec(substr($dateStr, 15, 5)); | |
$minute = bindec(substr($dateStr, 20, 6)); | |
$second = bindec(substr($dateStr, 26, 6)); | |
// file size right after the date | |
// max value of 1798134720 (6B 2D 5B C0) according to the documentation | |
// So far only know of up to 393210 (05 FF FA) | |
$fileSize = hexDecE($attr, $offset + 8, 4, false); | |
// record & key size (indexes + libaries + relative) | |
// these are little-endian | |
$recl = hexDecE($attr, $offset + 12, 2, false); | |
$keyl = hexDecE($attr, $offset + 14, 2, false); | |
// reserved blocks and the block offsets for each file part: | |
$parts = getParts($attr, $offset + 16, 5); | |
// up to 107 fragments are supported by the filesystem, located in two optional 256b blocks. (max filesize of 1.67 GiB) | |
$extended1 = hexDecE($attr, $offset + 41, 3, false) * 256; | |
$extended2 = hexDecE($attr, $offset + 44, 3, false) * 256; | |
// growth % stored at the end | |
$raw_grow = hexDecE($attr, $offset + 47, 1, false); | |
// growth is stored as a float in two nibble integers | |
$grow = (($raw_grow & 0xF0) >> 4).'.'.($raw_grow & 0x0F); | |
return [ | |
'name' => $name, | |
'name1' => $name1, | |
'name2' => $name2, | |
'type' => $type, | |
'user' => $owner, | |
'reclen' => $recl, | |
'keylen' => $keyl, | |
'filesize' => $fileSize, | |
'allocated' => $parts[0]['allocated'] ?? 0, | |
'offset' => $parts[0]['offset'] ?? 0, | |
'fileparts' => $parts, | |
'extended1' => $extended1, | |
'extended2' => $extended2, | |
'perms' => $perms, | |
'grow' => $grow, | |
'modified_year' => $year, | |
'modified_month' => $month, | |
'modified_day' => $day, | |
'modified_hour' => $hour, | |
'modified_minute' => $minute, | |
'modified_second' => $second, | |
// only needed for debugging. bin2hex uses a lot of memory internally. | |
//'raw_attr' => bin2hex(substr($attr, $offset + 0, 48)), | |
//'raw_date' => $dateStr, | |
//'raw_grow' => $raw_grow | |
]; | |
} | |
function read_filerecExtended($fh, $partitionStart, &$file) { | |
if($file['extended1'] != 0) { | |
fseek($fh, $file['extended1'] + $partitionStart, SEEK_SET); | |
$bytes = fread($fh, 256); | |
$file['fileparts'] = array_merge($file['fileparts'], getParts($bytes, 0, 51)); | |
} | |
if($file['extended2'] != 0) { | |
fseek($fh, $file['extended2'] + $partitionStart, SEEK_SET); | |
$bytes = fread($fh, 256); | |
$file['fileparts'] = array_merge($file['fileparts'], getParts($bytes, 0, 51)); | |
} | |
} | |
function getParts(&$attr, $start, $max) { | |
$parts = []; | |
$i = $start; | |
for($j = 0; $j < $max; ++$j) { | |
$part = []; | |
// this is the length of the reserved blocks (256 byte blocks) | |
$part['allocated'] = hexDecE($attr, $i, 2, false) * 256; | |
// absolute offset in 256-byte increments to start of disk partition | |
$part['offset'] = hexDecE($attr, $i + 2, 3, false) * 256; | |
if ($part['offset'] !=0) { | |
$parts[] = $part; | |
} | |
$i += 5; | |
} | |
return $parts; | |
} | |
function read_file($fh, $file, $partitionStart) { | |
// build up the file parts | |
$data = null; | |
foreach ($file['fileparts'] as $part) { | |
if($part['allocated'] != 0) { | |
// extract this piece of the file | |
fseek($fh, $part['offset'] + $partitionStart, SEEK_SET); | |
$bytes = fread($fh, $part['allocated']); | |
if ($data == null) | |
$data = $bytes; | |
else | |
$data .= $bytes; | |
} | |
} | |
return $data == null ? null : substr($data, 0, $file['filesize']); | |
} | |
function read_partitionrec($fh) { | |
$attr = fread($fh, 64); | |
$label = theosStr(trim(substr($attr, 0, 8))); | |
$restoredFrom = hexDecE($attr, 8, 1, false); // just a guess | |
$restoreLabel = theosStr(trim(substr($attr, 9, 8))); | |
$restoreDate = sprintf('%0'.(4 * 8).'b', hexDecE($attr, 17, 4, true)); // also a guess | |
$year = 1995 + bindec(substr($restoreDate, 1, 6)); | |
$month = bindec(substr($restoreDate, 7, 4)); | |
$day = bindec(substr($restoreDate, 11, 5)); | |
// alloc index is number of 256-byte blocks, but they store 4 records per block | |
$mainIndexAlloc = hexDecE($attr, 20, 2, false) * 4; | |
$cylinders = hexDecE($attr, 22, 2, false); | |
$heads = hexDecE($attr, 24, 1, false); | |
$sectors = hexDecE($attr, 25, 1, false); | |
// no idea how to read the rest of the entry.. | |
return [ | |
'label' => $label, | |
'indexTableSize' => $mainIndexAlloc, | |
'cylinders' => $cylinders, | |
'heads' => $heads, | |
'sectors' => $sectors, | |
'restoreMedia' => $restoredFrom, | |
'restoreLabel' => $restoreLabel, | |
'restore_year' => $year, | |
'restore_month' => $month, | |
'restore_day' => $day, | |
'raw' => bin2hex($attr) | |
]; | |
} | |
function read_mbr($fh) { | |
// Generic MBR structure | |
// Bootstrap code in the first 446 bytes | |
fseek($fh, 446, SEEK_SET); | |
$partitions = []; | |
for ($i = 0; $i < 4; ++$i) { | |
$part = read_mbr_partition($fh); | |
if ($part != null) { | |
$partitions[] = $part; | |
} | |
} | |
return $partitions; | |
} | |
function read_mbr_partition($fh) { | |
$rec = fread($fh, 16); | |
// First byte must be a 0x80 (Active) or a 0x00 (Inactive) | |
$status = substr($rec, 0, 1); | |
if ($status == "\x80") { | |
$status = true; | |
} else if ($status == "\x00") { | |
$status = false; | |
} else { | |
// we don't need to worry about other types, so if it's not one of the older types we'll ignore it | |
return null; | |
} | |
// first sector | |
$chs_head_0 = hexDecE($rec, 1, 1, false); | |
$chs_sector_0 = hexDecE($rec, 2, 1, false); | |
$chs_cylinder_0 = hexDecE($rec, 3, 1, false); | |
// sector byte contains 2 high bits for cylinder | |
$chs_cylinder_0 |= ($chs_sector_0 & 192) << 2; | |
$chs_sector_0 &= 63; | |
// partition type | |
$type = hexDecE($rec, 4, 1, false); | |
if ($type == DISK_THEOS_2G) $typeStr = 'THEOS v3.2 2G'; | |
elseif ($type == DISK_THEOS_SPANNED) $typeStr = 'THEOS v4 Spanned'; | |
elseif ($type == DISK_THEOS_4G) $typeStr = 'THEOS v4 4G'; | |
elseif ($type == DISK_THEOS_4G_EXTENDED) $typeStr = 'THEOS v4 4G Extended'; | |
// we are only looking for THEOS disks | |
else return null; | |
// last sector (probably will be all FF for a single partition disk) | |
$chs_head_n = hexDecE($rec, 5, 1, false); | |
$chs_sector_n = hexDecE($rec, 6, 1, false); | |
$chs_cylinder_n = hexDecE($rec, 7, 1, false); | |
// sector byte contains 2 high bits for cylinder | |
$chs_cylinder_n |= ($chs_sector_n & 192) << 2; | |
$chs_sector_n &= 63; | |
// Logical block of first sector (little-endian) | |
$offset = hexDecE($rec, 8, 4, false) * 512; | |
// number of sectors in the partition | |
$sectors = hexDecE($rec, 12, 4, false); | |
return [ | |
'chs-start-head' => $chs_head_0, | |
'chs-start-sector' => $chs_sector_0, | |
'chs-start-cylinder' => $chs_cylinder_0, | |
'chs-end-head' => $chs_head_n, | |
'chs-end-sector' => $chs_sector_n, | |
'chs-end-cylinder' => $chs_cylinder_n, | |
'offset' => $offset, | |
'sectors' => $sectors, | |
'type' => $type, | |
'type-name' => $typeStr | |
]; | |
} | |
function load_users($fh, $fs_files, $partitionStart) { | |
$users = []; | |
foreach ($fs_files as $file) { | |
if ($file['name'] == 'SYSTEM/THEOS32/ACCOUNT') { | |
$bytes = read_file($fh, $file, $partitionStart); | |
if ($bytes == null || strlen($bytes) < $file['filesize']) return null; | |
/* | |
// SYSTEM.ACCOUNT v3 file format.. kinda.. (doesn't load system accounts properly) | |
$count = strlen($bytes) / 151; | |
for($i = 0; $i < $count; ++$i) { | |
$line = substr($bytes, $i * 151, 150); | |
$user = []; | |
echo "USER " . substr($line, 0, 8) . " -- " . substr($line, 16, 2) . " \n "; | |
$user['name'] = substr($line, 0, 8); | |
$user['id'] = hexdec(substr($line, 16, 2)); | |
$users[$user['id']] = $user; | |
} | |
*/ | |
// SYSTEM.THEOS32.ACCOUNT v4 file format | |
$lines = explode(chr(13), $bytes); | |
$user = null; | |
foreach ($lines as $line) { | |
if (substr($line,0,8) == 'ACCOUNT=') { | |
if ($user != null) { | |
if(isset($users[$user['ID']])) { | |
// (IDs are not unique) | |
$users[$user['ID'].' '.count($users)] = $user; | |
} else { | |
$users[$user['ID']] = $user; | |
} | |
} | |
$user = []; | |
$user['NAME'] = substr($line, 8); | |
} else if (substr($line, 0, 1) == chr(9) && ($i = strpos($line, '=')) !== false) { | |
$key = substr($line, 1, $i - 1); | |
$val = substr($line, $i + 1); | |
$user[$key] = $val; | |
} | |
} | |
//if ($user != null) $users[$user['ID']] = $user; | |
if ($user != null) { | |
if(isset($users[$user['ID']])) { | |
// (IDs are not unique) | |
$users[$user['ID'].' '.count($users)] = $user; | |
} else { | |
$users[$user['ID']] = $user; | |
} | |
} | |
break; | |
} | |
} | |
return $users; | |
} | |
function hexDecE(&$input, $start, $length, $bigEndian) { | |
if($bigEndian) { | |
$hex = unpack("H*", is_array($input) ? implode(array_slice($input, $start, $length)) : substr($input, $start, $length)); | |
} else { | |
$hex = unpack("H*", is_array($input) ? implode(array_reverse(array_slice($input, $start, $length))) : strrev(substr($input, $start, $length))); | |
} | |
$i = hexdec( $hex[1] ); | |
$hex = null; | |
return $i; | |
} | |
function strToBin($str) { | |
$characters = str_split($str); | |
$binary = []; | |
foreach ($characters as $character) { | |
$data = unpack('H*', $character); | |
$binary[] = sprintf("%'08s", base_convert($data[1], 16, 2)); | |
} | |
return implode(' ', $binary); | |
} | |
function theosPerms($file) { | |
if(!isset($file['perms'])) return ''; | |
$p = $file['perms']; | |
// mostly following the theos file perm print structure, | |
// but they use the first position for M and H, I'm breaking that out. | |
$s = ''; | |
$s .= $p & ATTR_VISIBLE ? '.' : 'H'; | |
$s .= $p & ATTR_MODIFIED ? 'M' : '.'; | |
$s .= $p & PERM_OW ? 'W' : '.'; | |
$s .= $p & PERM_OR ? 'R' : '.'; | |
$s .= $p & PERM_UE ? 'E' : '.'; | |
$s .= $p & PERM_UX ? 'X' : '.'; | |
$s .= $p & PERM_UW ? 'W' : '.'; | |
$s .= $p & PERM_UR ? 'R' : '.'; | |
return $s; | |
} | |
function theosStr($str) { | |
global $characterReplacements; | |
return str_replace(array_keys($characterReplacements), array_values($characterReplacements), $str); | |
} | |
function write_filetime($filename, $file, $utc_offset) { | |
touch($filename, strtotime(sprintf('%04d-%02d-%02dT%02d:%02d:%02d%s', | |
$file['modified_year'], $file['modified_month'], $file['modified_day'], | |
$file['modified_hour'], $file['modified_minute'], $file['modified_second'], | |
$utc_offset))); | |
} | |
function filesort(&$array) { | |
uasort($array, "filecmp"); | |
} | |
function filecmp($a, $b) { | |
return strcmp($a['name'], $b['name']); | |
} | |
function useridsort(&$array) { | |
uasort($array, "useridcmp"); | |
} | |
function useridcmp($a, $b) { | |
$s1 = substr($a['ID'], 0, 1) == 'S'; | |
$s2 = substr($b['ID'], 0, 1) == 'S'; | |
$d = $s1 && !$s2 ? -1 : ($s2 && !$s1 ? 1 : 0); | |
$d = $d != 0 ? $d : strcmp(sprintf("%02s", $a['ID']), sprintf("%02s", $b['ID'])); | |
return $d != 0 ? $d : strcmp($a['NAME'], $b['NAME']); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment