Skip to content

Instantly share code, notes, and snippets.

@jascotty2
Created November 15, 2020 07:54
Show Gist options
  • Save jascotty2/940238522a1decd3dc39be6b64579487 to your computer and use it in GitHub Desktop.
Save jascotty2/940238522a1decd3dc39be6b64579487 to your computer and use it in GitHub Desktop.
<?php
/**
* Copyright (C) 2020 Jacob Scott <jascottytechie@gmail.com>
*
* Description: THEOS Disk Image File Extraction
*
* This program is free software: you can redistribute it and/or modify it under
* the terms of the GNU General Public License as published by the Free Software
* Foundation, either version 3 of the License, or (at your option) any later
* version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program. If not, see <http://www.gnu.org/licenses/>.
*/
// raw disk image to use
$file = 'disk.img';
// folder to save recovered files to
$outFolder = 'recovered';
// should library files be separated into folders instead of using the dot notation?
$librariesAsFolders = true;
// which users to extract files from
// 0 == system files
$userFiles = [0, 4];
// should we ignore empty files?
$skipEmptyFiles = true;
// examine the files but don't extract?
$dryRun = false;
// log files and progress to a file?
$logResults = true;
$logFile = '_recovery_'.basename($file).'.log';
// local system timezone required to get the extracted file dates correct
date_default_timezone_set('America/Chicago');
///////////////////////////////////////////////////////////////////////////
/////////////////////// Begin Program Logic ///////////////////////
///////////////////////////////////////////////////////////////////////////
// partition types
define('DISK_THEOS_2G', 56);
define('DISK_THEOS_SPANNED', 57);
define('DISK_THEOS_4G', 58);
define('DISK_THEOS_4G_EXTENDED', 59);
// I suspect these are bit fields, but unable to narrow down what 32/x20 is for
define('FILE_PROGRAM_1', 1);
define('FILE_INDEXED', 2);
define('FILE_RELATIVE', 8);
define('FILE_STREAM', 16);
define('FILE_PROGRAM', 33);
define('FILE_FOLDER', 64);
define('FILE_SYS_EXE', 65); // VIR.CMD386 & SYSTEM.THEOS32 programs
define('FILE_LIBRARY', 128);
define('FILE_MEMBER', 192); // member of a library
define('FILE_DELETED', 255);
// for logging:
$types = [
1 => 'PROGRAM',
2 => 'INDEX',
8 => 'RELATIVE',
16 => 'STREAM',
33 => 'PROGRAM',
64 => 'FOLDER',
65 => 'SYS_EXE',
128 => 'LIBRARY',
192 => 'MEMBER',
255 => 'DELETED'
];
// Permission bitfield
define('PERM_UR', 1);
define('PERM_UW', 2);
define('PERM_UX', 4);
define('PERM_UE', 8);
define('PERM_OR', 16);
define('PERM_OW', 32);
define('ATTR_MODIFIED', 64);
define('ATTR_VISIBLE', 128);
// extended ASCII
// TODO? Option for converting ASCII files to UTF-8 with these translated codes?
$characterReplacements = [
// 129-159 have no symbols mapped to them
"\x81" => "", //129
"\x82" => "", //130
"\x83" => "", //131
"\x84" => "", //132
"\x85" => "", //133
"\x86" => "", //134
"\x87" => "", //135
"\x88" => "", //136
"\x89" => "", //137
"\x8A" => "", //138
"\x8B" => "", //139
"\x8C" => "", //140
"\x8D" => "", //141
"\x8E" => "", //142
"\x8F" => "", //143
"\x90" => "", //144
"\x91" => "", //145
"\x92" => "", //146
"\x93" => "", //147
"\x94" => "", //148
"\x95" => "", //149
"\x96" => "", //150
"\x97" => "", //151
"\x98" => "", //152
"\x99" => "", //153
"\x9A" => "", //154
"\x9B" => "", //155
"\x9C" => "", //156
"\x9D" => "", //157
"\x9E" => "", //158
"\x9F" => "", //159
"\xA0" => "\xE2\x94\x8C", //160 ┌
"\xA1" => "\xE2\x94\x90", //161 ┐
"\xA2" => "\xE2\x94\x98", //162 ┘
"\xA3" => "\xE2\x94\x94", //163 └
"\xA4" => "\xE2\x94\xBC", //164 ┼
"\xA5" => "\xE2\x94\x9C", //165 ├
"\xA6" => "\xE2\x94\xA4", //166 ┤
"\xA7" => "\xE2\x94\xAC", //167 ┬
"\xA8" => "\xE2\x94\xB4", //168 ┴
"\xA9" => "\xE2\x94\x80", //169 ─
"\xAA" => "\xE2\x94\x82", //170 │
"\xAB" => "\xE2\x95\xAD", //171 ╭
"\xAC" => "\xE2\x95\xAE", //172 ╮
"\xAD" => "\xE2\x95\xAF", //173 ╯
"\xAE" => "\xE2\x95\xB0", //174 ╰
"\xAF" => "\xE2\x95\x94", //175 ╔
"\xB0" => "\xE2\x95\x97", //176 ╗
"\xB1" => "\xE2\x95\x9D", //177 ╝
"\xB2" => "\xE2\x95\x9A", //178 ╚
"\xB3" => "\xE2\x95\xAC", //179 ╬
"\xB4" => "\xE2\x95\xA0", //180 ╠
"\xB5" => "\xE2\x95\xA3", //181 ╣
"\xB6" => "\xE2\x95\xA6", //182 ╦
"\xB7" => "\xE2\x95\xA9", //183 ╩
"\xB8" => "\xE2\x95\x90", //184 ═
"\xB9" => "\xE2\x95\x91", //185 ║
// no symbols again from 186-191
"\xBA" => "", //186
"\xBB" => "", //187
"\xBC" => "", //188
"\xBD" => "", //189
"\xBE" => "", //190
"\xBF" => "", //191
"\xC0" => "\xC3\x84", //192 Ä
"\xC1" => "\xC3\xA4", //193 ä
"\xC2" => "\xC3\xA2", //194 â
"\xC3" => "\xC3\xA0", //195 à
"\xC4" => "\xC3\xA1", //196 á
"\xC5" => "\xC3\x89", //197 É
"\xC6" => "\xC3\xAB", //198 ë
"\xC7" => "\xC3\xAA", //199 ê
"\xC8" => "\xC3\xA8", //200 è
"\xC9" => "\xC3\xA9", //201 é
"\xCA" => "\xC3\xAF", //202 ï
"\xCB" => "\xC3\xAE", //203 î
"\xCC" => "\xC3\xAC", //204 ì
"\xCD" => "\xC3\xAD", //205 í
"\xCE" => "\xC3\x96", //206 Ö
"\xCF" => "\xC3\xB6", //207 ö
"\xD0" => "\xC3\xB4", //208 ô
"\xD1" => "\xC3\xB2", //209 ò
"\xD2" => "\xC3\xB3", //210 ó
"\xD3" => "\xC3\x9C", //211 Ü
"\xD4" => "\xC3\xBC", //212 ü
"\xD5" => "\xC3\xBB", //213 û
"\xD6" => "\xC3\xB9", //214 ù
"\xD7" => "\xC3\xBA", //215 ú
"\xD8" => "\xC3\x87", //216 Ç
"\xD9" => "\xC3\xA7", //217 ç
"\xDA" => "\xC3\x91", //218 Ñ
"\xDB" => "\xC3\xB1", //219 ñ
"\xDC" => "\xC3\x86", //220 Æ
"\xDD" => "\xC3\xA6", //221 æ
"\xDE" => "\xC3\x85", //222 Å
"\xDF" => "\xC3\xA5", //223 å
"\xE0" => "\xC3\x9F", //224 ß
"\xE1" => "\xC2\xBF", //225 ¿
"\xE2" => "\xC2\xA1", //226 ¡
"\xE3" => "\xC2\xA2", //227 ¢
"\xE4" => "\xC2\xA3", //228 £
"\xE5" => "\xC2\xA5", //229 ¥
"\xE6" => "\xE2\x82\xA7", //230 ₧ // now-defunct spanish currency (peseta)
"\xE7" => "\xE2\x82\xAC", //231 €
"\xE8" => "\xC2\xBC", //232 ¼
"\xE9" => "\xC2\xBD", //233 ½
"\xEA" => "\xC3\xBF", //234 ÿ
"\xEB" => "\xC2\xA7", //235 §
"\xEC" => "\xE2\x97\xBC", //236 ◼
"\xED" => "\xC2\xB2", //237 ²
"\xEE" => "\xC2\xA9", //238 ©
"\xEF" => "\xC2\xAE", //239 ®
// no more characters used after 239
"\xF0" => "", //240
"\xF1" => "", //241
"\xF2" => "", //242
"\xF3" => "", //243
"\xF4" => "", //244
"\xF5" => "", //245
"\xF6" => "", //246
"\xF7" => "", //247
"\xF8" => "", //248
"\xF9" => "", //249
"\xFA" => "", //250
"\xFB" => "", //251
"\xFC" => "", //252
"\xFD" => "", //253
"\xFE" => "", //254
"\xFF" => "", //255
];
$fh = fopen($file, 'rb+');
if (!$fh) die('failed to open file: '.$file);
// check for the boot sector
fseek($fh, 510, SEEK_SET);
$mbr = fread($fh, 2);
if ($mbr != "\x55\xAA") die('file is not a disk image');
// output folder
if (($logResults || !$dryRun) && !file_exists($outFolder)) {
mkdir($outFolder, 077, true) or die("Failed to create output directory: $outFolder\n");
} else if(!is_dir($outFolder)) {
die("Failed to create output directory: $outFolder\n");
}
$flog = $logResults ? fopen($outFolder.'/'.$logFile, 'w') : null;
$flog && fputs($flog, "Reading $file\n");
$partitionStart = 0;
$partitions = read_mbr($fh);
if (($c = count($partitions)) != 0) {
// MBR with partition data
$partitionStart = $partitions[0]['offset'];
// just guessing an HD is a system disk
$systemDisk = true;
foreach ($partitions as $part) {
echo "Disk partition detected: {$part['type-name']} at {$part['offset']} with {$part['sectors']} sectors\n";
$flog && fputs($flog, "Disk partition detected: {$part['type-name']} at {$part['offset']} with {$part['sectors']} sectors\n");
}
if ($c != 1) {
// (My disk only has one partition)
echo "Running recovery for first partition only\n";
$flog && fputs($flog, "Running recovery for first partition only\n");
}
//$nullPad = "\xE6\x6E"; // hard drive uses a different fill byte pattern
} else {
// floppy disks don't have a partition table
$systemDisk = false;
//$nullPad = "\xE5";
}
fseek($fh, 1024 + $partitionStart, SEEK_SET);
$partitionData = read_partitionrec($fh);
if ($partitionData['indexTableSize'] == 0) die ('Invalid Partition Definition');
$indexFiles = $partitionData['indexTableSize'];
echo "Reading disk image partition {$partitionData['label']} from offset $partitionStart\n";
$flog && fputs($flog, "Disk Label: {$partitionData['label']}\n".
"{$partitionData['cylinders']} cylinders, {$partitionData['heads']} heads, {$partitionData['sectors']} sectors\n".
//"Using ".(strtoupper(bin2hex($nullPad)))." as NULL\n".
"Loading $indexFiles file records starting at ".(1024 + $partitionStart)."\n");
// Load the file table, starting with files from main index
// main file index starts immediately after partition definition
fseek($fh, 1024 + $partitionStart + 256, SEEK_SET);
$fs_files = [];
for($i = 0; $i < $indexFiles; ++$i) {
if (($rec = read_filerec($fh)) !== null) {
// extract libraries into their own directory
if ($librariesAsFolders && $rec['type'] == FILE_LIBRARY) {
$rec['name'] = str_replace('.', '/', $rec['name']);
}
$fs_files[] = $rec;
}
}
$mainFound = count($fs_files);
// after the index are 1 or 2 blocks of data
// from documentation, seems to be a free space map block + an optional bad sector map block
// (1 block on a floppy, 2 on a disk) (2nd segment on disk isn't filled with any data after #11)
// appear to be organized in 10-byte segments
// per System Reference pg 751 detailing the File System, the layout of these blocks + the disk volume label block and these two
// are desribed in an VLB.H file included if the system has the "THEOS C" addon (not included the copy I have)
// load extended fragment data
for($i = 0; $i < $mainFound; ++$i) {
if($fs_files[$i]['extended1'] != 0 || $fs_files[$i]['extended2'] != 0) {
read_filerecExtended($fh, $partitionStart, $fs_files[$i]);
}
}
$flog && fputs($flog, "$mainFound Files loaded from main directory index\n");
// folders & library files use a trick to store more files: there's a binary stream at the 'file' in the filesystem format
for($i = 0; $i < $mainFound; ++$i) {
if ($fs_files[$i]['type'] == FILE_LIBRARY || $fs_files[$i]['type'] == FILE_FOLDER) {
load_subfiles($fh, $partitionStart, $fs_files[$i], $fs_files, $librariesAsFolders);
}
}
$flog && fputs($flog, (count($fs_files) - $mainFound)." Files loaded from folders and libraries\n");
// output file table
if ($flog) {
$users = !$systemDisk ? null : load_users($fh, $fs_files, $partitionStart);
if($systemDisk) {
fputs($flog, "\nSystem Users:\n");
useridsort($users);
fputs($flog, "┌────────┬──┬────┬────┬────────────────┬────────────────┬────────────────┬────────────────────────┐\n");
fputs($flog, "│Username│ID│Priv│Mail│ Path │ Prompt │ Library │ Password Bytes │\n");
fputs($flog, "├────────┼──┼────┼────┼────────────────┼────────────────┼────────────────┼────────────────────────┤\n");
foreach ($users as $user) {
fputs($flog, sprintf("│%-8s│%2s│%4d│%4s│%-16s│%-16s│%-16s│%24s│\n",
$user['NAME'], explode(' ', $user['ID'])[0], $user['PRIV'], $user['MAIL'] ?? 'NO',
$user['PATH'] ?? '', $user['PROMPT'] ?? '', $user['LIBRARY'] ?? '',
isset($user['PASSWORD']) ? strtoupper(implode(' ', str_split(bin2hex($user['PASSWORD']), 2))) : ''));
}
fputs($flog, "└────────┴──┴────┴────┴────────────────┴────────────────┴────────────────┴────────────────────────┘\n");
}
$fsOffset = 0;
fputs($flog, "\nMain File Table, $mainFound / $indexFiles Files:\n");
fputs($flog, "┌────────┬────────┬────────┬─┬────────┬────────┬─────┬─────────┬────────┬────┬────┬────────┬────┬──────────┬────┬──────────┐\n");
fputs($flog, "│Filename│Filetype│Membname│:│ User │ Date │Time │ Org. │ Size │Recl│Keyl│ Perms │Grow│ Position │Frag│Index Pos │\n");
fputs($flog, "├────────┼────────┼────────┼─┼────────┼────────┼─────┼─────────┼────────┼────┼────┼────────┼────┼──────────┼────┼──────────┤\n");
$main = array_slice($fs_files, $fsOffset, $mainFound);
filesort($main);
$extraFiles = [];
$nExtraFiles = 0;
foreach ($main as $file) {
if ($file['type'] == FILE_LIBRARY || $file['type'] == FILE_FOLDER) {
$extraFiles[] = $file;
++$nExtraFiles;
}
$recl = $keyl = '';
if($file['type'] != FILE_DELETED && $file['type'] & 1) {
$recl = sprintf("%04s", strtoupper(dechex($file['reclen'])));
$keyl = sprintf("%04s", strtoupper(dechex($file['keylen'])));
} else if ($file['type'] == FILE_RELATIVE) {
$recl = $file['reclen'];
} else if ($file['type'] == FILE_LIBRARY) {
$recl = $file['filesize'] / 64;
} else if($file['type'] == FILE_INDEXED) {
$recl = $file['reclen'];
$keyl = $file['keylen'];
}
// fragments
/*$frags = '';
for($f = 0; $f < 7; ++$f) {
$b = !isset($file['fileparts'][$f]);
$frags .= sprintf("%8d│%10d│",
$b ? '' : $file['fileparts'][$f]['allocated'],
$b ? '' : $file['fileparts'][$f]['offset']);
}*/
fputs($flog, sprintf("│%-8s│%-8s│%-8s│%s│%-8s│%02d/%02d/%02d│%02d:%02d│%-9s│%8d│%4s│%4s│%8s│%4s│%10d│%4s│%10d│\n",
$file['name1'], $file['name2'], '',
$partitionStart == 0 ? 'F' : 'S', // just simulating the drive letter :)
$users == null ? sprintf("%8s", $file['user']) : (isset($users[$file['user']]) ? $users[$file['user']]['NAME'] : '??'),
$file['modified_month'], $file['modified_day'], substr($file['modified_year'], 2),
$file['modified_hour'], $file['modified_minute'],
$types[$file['type']] ?? ''.$file['type'],
$file['filesize'],
$recl, $keyl,
theosPerms($file), $file['grow'],
$file['offset'] + $partitionStart, count($file['fileparts']), $file['def_pos']/*, $frags*/));
}
fputs($flog, "└────────┴────────┴────────┴─┴────────┴────────┴─────┴─────────┴────────┴────┴────┴────────┴────┴──────────┴────┴──────────┘\n");
for($i = 0; $i < $nExtraFiles; ++$i) {
$mainfile = $extraFiles[$i];
$count = $mainfile['count'] ?? 0;
$max = $mainfile['filesize'] / 64;
fputs($flog, "\n{$mainfile['name']} File Table, $count / $max Files:\n");
fputs($flog, "┌────────┬────────┬────────┬─┬────────┬────────┬─────┬─────────┬────────┬────┬────┬────────┬────┬──────────┬────┬──────────┐\n");
fputs($flog, "│Filename│Filetype│Membname│:│ User │ Date │Time │ Org. │ Size │Recl│Keyl│ Perms │Grow│ Position │Frag│Index Pos │\n");
fputs($flog, "├────────┼────────┼────────┼─┼────────┼────────┼─────┼─────────┼────────┼────┼────┼────────┼────┼──────────┼────┼──────────┤\n");
if ($count > 0) {
$index = $mainfile['members'];
sort($index);
foreach ($index as $file) {
if ($file['type'] == FILE_LIBRARY || $file['type'] == FILE_FOLDER) {
$extraFiles[] = $file;
++$nExtraFiles;
}
$recl = $keyl = '';
if($file['type'] != FILE_DELETED && $file['type'] & 1) {
$recl = sprintf("%04s", strtoupper(dechex($file['reclen'])));
$keyl = sprintf("%04s", strtoupper(dechex($file['keylen'])));
} else if ($file['type'] == FILE_RELATIVE) {
$recl = $file['reclen'];
} else if ($file['type'] == FILE_LIBRARY) {
$recl = $file['filesize'] / 64;
} else if($file['type'] == FILE_INDEXED) {
$recl = $file['reclen'];
$keyl = $file['keylen'];
}
// fragments
/*
$frags = '';
for($f = 0; $f < count($file['fileparts']); ++$f) {
$b = false;//!isset($file['fileparts'][$f]);
$frags .= sprintf("%8d│%10d│",
$b ? '' : $file['fileparts'][$f]['allocated'],
$b ? '' : $file['fileparts'][$f]['offset']);
}//*/
fputs($flog, sprintf("│%-8s│%-8s│%-8s│%s│%-8s│%02d/%02d/%02d│%02d:%02d│%-9s│%8d│%4s│%4s│%8s│%4s│%10d│%4s│%10d│\n", //%s\n",
$mainfile['name1'], $mainfile['name2'], $file['name1'],
$partitionStart == 0 ? 'F' : 'S', // just simulating the drive letter :)
$users == null ? sprintf("%8s", $file['user']) : (isset($users[$file['user']]) ? $users[$file['user']]['NAME'] : '??'),
$file['modified_month'], $file['modified_day'], substr($file['modified_year'], 2),
$file['modified_hour'], $file['modified_minute'],
$types[$file['type']] ?? ''.$file['type'],
$file['filesize'],
$recl, $keyl,
theosPerms($file), $file['grow'],
$file['offset'] + $partitionStart, count($file['fileparts']), $file['def_pos']/*, $frags*/));
}
}
fputs($flog, "└────────┴────────┴────────┴─┴────────┴────────┴─────┴─────────┴────────┴────┴────┴────────┴────┴──────────┴────┴──────────┘\n");
}
}
// utc offset required for extracting with the correct datetime
$utc_offset = date('Z') / 3600;
$utc_offset = sprintf(($utc_offset < 0 ? '-' : '+').'%02s00', abs($utc_offset));
if (!$dryRun) {
// create folders for extracted data
foreach ($fs_files as $file) {
// extract libraries into their own directory
if ($librariesAsFolders && $file['type'] == FILE_LIBRARY) {
$dir = $outFolder.'/'.substr($file['name'], 0, strrpos($file['name'], '/'));
if (!file_exists($dir)) {
mkdir($dir);
write_filetime($dir, $file, $utc_offset);
}
}
if($file['type'] == FILE_FOLDER || $file['type'] == FILE_LIBRARY) {
$dir = $outFolder.'/'.$file['name'];
if (!file_exists($dir)) {
mkdir($dir);
write_filetime($dir, $file, $utc_offset);
}
}
}
}
// extract data
$saved = [];
foreach ($fs_files as $file) {
// check user bit
if($userFiles != null && array_search($file['user'], $userFiles) === false) {
//$flog && fputs($flog, "Skipping {$file['name']} (Not in users list: {$file['user']})\n");
echo "Skipping {$file['name']} (Not in users list: {$file['user']})\n";
continue;
} else if ($file['type'] == FILE_DELETED) {
//$flog && fputs($flog, "Skipping {$file['name']} (Deleted File)\n");
echo "Skipping {$file['name']} (Deleted File)\n";
continue;
} else if (array_search($file['name'], $saved) !== false) {
//$flog && fputs($flog, "Skipping {$file['name']} (Duplicate File)\n");
echo "Skipping {$file['name']} (Duplicate File)\n";
continue;
} else if ($skipEmptyFiles && $file['filesize'] == 0) {
//$flog && fputs($flog, "Skipping {$file['name']} (0-Length File)\n");
echo "Skipping {$file['name']} (0-Length File)\n";
continue;
}
$saved[] = $file['name'];
$type = $types[$file['type']] ?? ''.$file['type'];
if ($file['type'] != FILE_LIBRARY && $file['type'] != FILE_FOLDER) {
$fn = $outFolder.'/'.($file['type'] != FILE_DELETED ? '' : '$').$file['name'];
if ($file['allocated'] == 0) {
echo "Empty File {$file['name']}\n";
$flog && fputs($flog, "Empty File {$file['name']}\n");
touch($fn);
write_filetime($fn, $file, $utc_offset);
} else {
$flog && fputs($flog, sprintf("Extracting: %-30s %12s %10u < %9u b %5u r %5u k (%12u)\n",
$file['name'], $type, $file['offset'] + $partitionStart, $file['filesize'], $file['reclen'], $file['keylen'], $file['def_pos']));
// extract this file
$bytes = read_file($fh, $file, $partitionStart);
if (strlen($bytes) < $file['filesize']) {
echo "\n!!!! BAD FILE - Missing data for {$file['name']} !!!!\n\n";
$flog && fputs($flog, "\n!!!! BAD FILE - Missing data for {$file['name']} !!!!\n\n");
continue;
}
$sections = count($file['fileparts']);
echo "Extracting {$file['name']}: {$file['allocated']} allocated bytes, {$file['filesize']} bytes long, $sections parts\n";
if (!$dryRun) {
$f = fopen($fn, 'wb');
fwrite($f, $bytes);
fclose($f);
write_filetime($fn, $file, $utc_offset);
}
}
}
}
// All done!
fclose($fh);
$flog && fclose($flog);
exit;
//////////////////////////////////////////////////////////////////////////
///////////////////// Begin Function Definitions /////////////////////
//////////////////////////////////////////////////////////////////////////
/*
File User modified File Rec Key Alloc Block Alloc Block Alloc Block Alloc Block Alloc Block Extra1 Extra2 Growth
File/Member Name File Extension Typ|Owner|Perm|date time| | Size | |Len| |Len| |Byt| |Offset| | #2| |Off #2| | #3| |Off #3| | #4| |Off #4| | #5| |Off #5| |Offset| |Offset| Percentage
|---------------------| |---------------------|\-|\----|\--/|---| |---| |---------| |---| |---| |---| |------| |---| |------| |---| |------| |---| |------| |---| |------| |------| |------| |-/
41 43 43 4F 55 4E 54 20 00 00 00 00 00 00 00 00 10 00 00 7F 6A AE 50 1F CD 13 00 00 00 00 00 00 01 00 5A 9A 00 10 00 59 CE 00 01 00 7A 1D 1A 01 00 A5 D2 00 01 00 EF 26 06 00 00 00 00 00 00 00
*/
function load_subfiles($fh, $partitionStart, &$file, &$records, $librariesAsFolders) {
// per the documentation, folder indexes are streams and may grow as any other file may
// The index will need to be loaded as a file, not as a linear stream from the disk, and folders may be infinitely nested
// first load in the index:
$index = read_file($fh, $file, $partitionStart);
// track file position marker for debugging:
$fileShard = 0;
$fileShardSize = $file['allocated'];
$filePos = $file['offset'] + $partitionStart;
$files = [];
// the 'file' is the file table to load, 64 bytes at a time:
for($indexPos = 0; $indexPos < $file['allocated']; $indexPos += 64) {
// file position marker
if ($indexPos >= $fileShardSize) {
++$fileShard;
$fileShardSize += $file['fileparts'][$fileShard]['allocated'];
$filePos = $file['fileparts'][$fileShard]['offset'] + $partitionStart;
}
// read the file record
if (($rec = read_filerec_file($index, $indexPos)) !== null) {
$rec['name'] = $file['name'].'/'.$rec['name'];
$rec['def_pos'] = $filePos + $indexPos;
if ($librariesAsFolders && $rec['type'] == FILE_LIBRARY) {
$rec['name'] = str_replace('.', '/', $rec['name']);
}
if($rec['extended1'] != 0 || $rec['extended2'] != 0) {
read_filerecExtended($fh, $partitionStart, $rec);
}
$files[] = $rec;
}
}
$file['count'] = count($files);
// now run recursive:
for($i = 0; $i < $file['count']; ++$i) {
if ($files[$i]['type'] == FILE_LIBRARY || $files[$i]['type'] == FILE_FOLDER) {
load_subfiles($fh, $partitionStart, $files[$i], $records, $librariesAsFolders);
}
}
// save library/folder members:
$file['members'] = $files;
// append to the main array for easy extraction
$records = array_merge($records, $files);
}
function read_filerec($fh) {
$attr = fread($fh, 64);
$rec = read_filerec_file($attr, 0);
$attr = null;
return $rec == null ? null : array_merge($rec, ['def_pos'=> ftell($fh) - 64]);
}
function read_filerec_file(&$attr, $offset) {
$name1 = trim(str_replace('$', '_', substr($attr, $offset + 0, 8)));
$name2 = trim(str_replace('$', '_', substr($attr, $offset + 8, 8)));
if ($name1 == '') return null;
$name = str_replace('$', '_', $name1.($name2 == '' ? '' : '.'.$name2));
$offset += 16;
// file type is the first byte
$type = hexDecE($attr, $offset + 0, 1, false);
// file owner
$owner = hexDecE($attr, $offset + 1, 2, false);
// file permission flags
$perms = hexDecE($attr, $offset + 3, 1, false);
// dates are stored in a bit stream. It's weird, don't ask.
// doubly-weird, this is the *one* time in the index table that a value is stored big-endian
$dateStr = sprintf('%0'.(4 * 8).'b', hexDecE($attr, $offset + 4, 4, true));
$year = 1986 + bindec(substr($dateStr, 0, 6)); // year is simply the number of years since 1986
$month = bindec(substr($dateStr, 6, 4));
$day = bindec(substr($dateStr, 10, 5));
// $time = bindec(substr($dateStr, 15, 17)); // this is the time, but somehow also increases faster than 1 per second at an irregular interval
$hour = bindec(substr($dateStr, 15, 5));
$minute = bindec(substr($dateStr, 20, 6));
$second = bindec(substr($dateStr, 26, 6));
// file size right after the date
// max value of 1798134720 (6B 2D 5B C0) according to the documentation
// So far only know of up to 393210 (05 FF FA)
$fileSize = hexDecE($attr, $offset + 8, 4, false);
// record & key size (indexes + libaries + relative)
// these are little-endian
$recl = hexDecE($attr, $offset + 12, 2, false);
$keyl = hexDecE($attr, $offset + 14, 2, false);
// reserved blocks and the block offsets for each file part:
$parts = getParts($attr, $offset + 16, 5);
// up to 107 fragments are supported by the filesystem, located in two optional 256b blocks. (max filesize of 1.67 GiB)
$extended1 = hexDecE($attr, $offset + 41, 3, false) * 256;
$extended2 = hexDecE($attr, $offset + 44, 3, false) * 256;
// growth % stored at the end
$raw_grow = hexDecE($attr, $offset + 47, 1, false);
// growth is stored as a float in two nibble integers
$grow = (($raw_grow & 0xF0) >> 4).'.'.($raw_grow & 0x0F);
return [
'name' => $name,
'name1' => $name1,
'name2' => $name2,
'type' => $type,
'user' => $owner,
'reclen' => $recl,
'keylen' => $keyl,
'filesize' => $fileSize,
'allocated' => $parts[0]['allocated'] ?? 0,
'offset' => $parts[0]['offset'] ?? 0,
'fileparts' => $parts,
'extended1' => $extended1,
'extended2' => $extended2,
'perms' => $perms,
'grow' => $grow,
'modified_year' => $year,
'modified_month' => $month,
'modified_day' => $day,
'modified_hour' => $hour,
'modified_minute' => $minute,
'modified_second' => $second,
// only needed for debugging. bin2hex uses a lot of memory internally.
//'raw_attr' => bin2hex(substr($attr, $offset + 0, 48)),
//'raw_date' => $dateStr,
//'raw_grow' => $raw_grow
];
}
function read_filerecExtended($fh, $partitionStart, &$file) {
if($file['extended1'] != 0) {
fseek($fh, $file['extended1'] + $partitionStart, SEEK_SET);
$bytes = fread($fh, 256);
$file['fileparts'] = array_merge($file['fileparts'], getParts($bytes, 0, 51));
}
if($file['extended2'] != 0) {
fseek($fh, $file['extended2'] + $partitionStart, SEEK_SET);
$bytes = fread($fh, 256);
$file['fileparts'] = array_merge($file['fileparts'], getParts($bytes, 0, 51));
}
}
function getParts(&$attr, $start, $max) {
$parts = [];
$i = $start;
for($j = 0; $j < $max; ++$j) {
$part = [];
// this is the length of the reserved blocks (256 byte blocks)
$part['allocated'] = hexDecE($attr, $i, 2, false) * 256;
// absolute offset in 256-byte increments to start of disk partition
$part['offset'] = hexDecE($attr, $i + 2, 3, false) * 256;
if ($part['offset'] !=0) {
$parts[] = $part;
}
$i += 5;
}
return $parts;
}
function read_file($fh, $file, $partitionStart) {
// build up the file parts
$data = null;
foreach ($file['fileparts'] as $part) {
if($part['allocated'] != 0) {
// extract this piece of the file
fseek($fh, $part['offset'] + $partitionStart, SEEK_SET);
$bytes = fread($fh, $part['allocated']);
if ($data == null)
$data = $bytes;
else
$data .= $bytes;
}
}
return $data == null ? null : substr($data, 0, $file['filesize']);
}
function read_partitionrec($fh) {
$attr = fread($fh, 64);
$label = theosStr(trim(substr($attr, 0, 8)));
$restoredFrom = hexDecE($attr, 8, 1, false); // just a guess
$restoreLabel = theosStr(trim(substr($attr, 9, 8)));
$restoreDate = sprintf('%0'.(4 * 8).'b', hexDecE($attr, 17, 4, true)); // also a guess
$year = 1995 + bindec(substr($restoreDate, 1, 6));
$month = bindec(substr($restoreDate, 7, 4));
$day = bindec(substr($restoreDate, 11, 5));
// alloc index is number of 256-byte blocks, but they store 4 records per block
$mainIndexAlloc = hexDecE($attr, 20, 2, false) * 4;
$cylinders = hexDecE($attr, 22, 2, false);
$heads = hexDecE($attr, 24, 1, false);
$sectors = hexDecE($attr, 25, 1, false);
// no idea how to read the rest of the entry..
return [
'label' => $label,
'indexTableSize' => $mainIndexAlloc,
'cylinders' => $cylinders,
'heads' => $heads,
'sectors' => $sectors,
'restoreMedia' => $restoredFrom,
'restoreLabel' => $restoreLabel,
'restore_year' => $year,
'restore_month' => $month,
'restore_day' => $day,
'raw' => bin2hex($attr)
];
}
function read_mbr($fh) {
// Generic MBR structure
// Bootstrap code in the first 446 bytes
fseek($fh, 446, SEEK_SET);
$partitions = [];
for ($i = 0; $i < 4; ++$i) {
$part = read_mbr_partition($fh);
if ($part != null) {
$partitions[] = $part;
}
}
return $partitions;
}
function read_mbr_partition($fh) {
$rec = fread($fh, 16);
// First byte must be a 0x80 (Active) or a 0x00 (Inactive)
$status = substr($rec, 0, 1);
if ($status == "\x80") {
$status = true;
} else if ($status == "\x00") {
$status = false;
} else {
// we don't need to worry about other types, so if it's not one of the older types we'll ignore it
return null;
}
// first sector
$chs_head_0 = hexDecE($rec, 1, 1, false);
$chs_sector_0 = hexDecE($rec, 2, 1, false);
$chs_cylinder_0 = hexDecE($rec, 3, 1, false);
// sector byte contains 2 high bits for cylinder
$chs_cylinder_0 |= ($chs_sector_0 & 192) << 2;
$chs_sector_0 &= 63;
// partition type
$type = hexDecE($rec, 4, 1, false);
if ($type == DISK_THEOS_2G) $typeStr = 'THEOS v3.2 2G';
elseif ($type == DISK_THEOS_SPANNED) $typeStr = 'THEOS v4 Spanned';
elseif ($type == DISK_THEOS_4G) $typeStr = 'THEOS v4 4G';
elseif ($type == DISK_THEOS_4G_EXTENDED) $typeStr = 'THEOS v4 4G Extended';
// we are only looking for THEOS disks
else return null;
// last sector (probably will be all FF for a single partition disk)
$chs_head_n = hexDecE($rec, 5, 1, false);
$chs_sector_n = hexDecE($rec, 6, 1, false);
$chs_cylinder_n = hexDecE($rec, 7, 1, false);
// sector byte contains 2 high bits for cylinder
$chs_cylinder_n |= ($chs_sector_n & 192) << 2;
$chs_sector_n &= 63;
// Logical block of first sector (little-endian)
$offset = hexDecE($rec, 8, 4, false) * 512;
// number of sectors in the partition
$sectors = hexDecE($rec, 12, 4, false);
return [
'chs-start-head' => $chs_head_0,
'chs-start-sector' => $chs_sector_0,
'chs-start-cylinder' => $chs_cylinder_0,
'chs-end-head' => $chs_head_n,
'chs-end-sector' => $chs_sector_n,
'chs-end-cylinder' => $chs_cylinder_n,
'offset' => $offset,
'sectors' => $sectors,
'type' => $type,
'type-name' => $typeStr
];
}
function load_users($fh, $fs_files, $partitionStart) {
$users = [];
foreach ($fs_files as $file) {
if ($file['name'] == 'SYSTEM/THEOS32/ACCOUNT') {
$bytes = read_file($fh, $file, $partitionStart);
if ($bytes == null || strlen($bytes) < $file['filesize']) return null;
/*
// SYSTEM.ACCOUNT v3 file format.. kinda.. (doesn't load system accounts properly)
$count = strlen($bytes) / 151;
for($i = 0; $i < $count; ++$i) {
$line = substr($bytes, $i * 151, 150);
$user = [];
echo "USER " . substr($line, 0, 8) . " -- " . substr($line, 16, 2) . " \n ";
$user['name'] = substr($line, 0, 8);
$user['id'] = hexdec(substr($line, 16, 2));
$users[$user['id']] = $user;
}
*/
// SYSTEM.THEOS32.ACCOUNT v4 file format
$lines = explode(chr(13), $bytes);
$user = null;
foreach ($lines as $line) {
if (substr($line,0,8) == 'ACCOUNT=') {
if ($user != null) {
if(isset($users[$user['ID']])) {
// (IDs are not unique)
$users[$user['ID'].' '.count($users)] = $user;
} else {
$users[$user['ID']] = $user;
}
}
$user = [];
$user['NAME'] = substr($line, 8);
} else if (substr($line, 0, 1) == chr(9) && ($i = strpos($line, '=')) !== false) {
$key = substr($line, 1, $i - 1);
$val = substr($line, $i + 1);
$user[$key] = $val;
}
}
//if ($user != null) $users[$user['ID']] = $user;
if ($user != null) {
if(isset($users[$user['ID']])) {
// (IDs are not unique)
$users[$user['ID'].' '.count($users)] = $user;
} else {
$users[$user['ID']] = $user;
}
}
break;
}
}
return $users;
}
function hexDecE(&$input, $start, $length, $bigEndian) {
if($bigEndian) {
$hex = unpack("H*", is_array($input) ? implode(array_slice($input, $start, $length)) : substr($input, $start, $length));
} else {
$hex = unpack("H*", is_array($input) ? implode(array_reverse(array_slice($input, $start, $length))) : strrev(substr($input, $start, $length)));
}
$i = hexdec( $hex[1] );
$hex = null;
return $i;
}
function strToBin($str) {
$characters = str_split($str);
$binary = [];
foreach ($characters as $character) {
$data = unpack('H*', $character);
$binary[] = sprintf("%'08s", base_convert($data[1], 16, 2));
}
return implode(' ', $binary);
}
function theosPerms($file) {
if(!isset($file['perms'])) return '';
$p = $file['perms'];
// mostly following the theos file perm print structure,
// but they use the first position for M and H, I'm breaking that out.
$s = '';
$s .= $p & ATTR_VISIBLE ? '.' : 'H';
$s .= $p & ATTR_MODIFIED ? 'M' : '.';
$s .= $p & PERM_OW ? 'W' : '.';
$s .= $p & PERM_OR ? 'R' : '.';
$s .= $p & PERM_UE ? 'E' : '.';
$s .= $p & PERM_UX ? 'X' : '.';
$s .= $p & PERM_UW ? 'W' : '.';
$s .= $p & PERM_UR ? 'R' : '.';
return $s;
}
function theosStr($str) {
global $characterReplacements;
return str_replace(array_keys($characterReplacements), array_values($characterReplacements), $str);
}
function write_filetime($filename, $file, $utc_offset) {
touch($filename, strtotime(sprintf('%04d-%02d-%02dT%02d:%02d:%02d%s',
$file['modified_year'], $file['modified_month'], $file['modified_day'],
$file['modified_hour'], $file['modified_minute'], $file['modified_second'],
$utc_offset)));
}
function filesort(&$array) {
uasort($array, "filecmp");
}
function filecmp($a, $b) {
return strcmp($a['name'], $b['name']);
}
function useridsort(&$array) {
uasort($array, "useridcmp");
}
function useridcmp($a, $b) {
$s1 = substr($a['ID'], 0, 1) == 'S';
$s2 = substr($b['ID'], 0, 1) == 'S';
$d = $s1 && !$s2 ? -1 : ($s2 && !$s1 ? 1 : 0);
$d = $d != 0 ? $d : strcmp(sprintf("%02s", $a['ID']), sprintf("%02s", $b['ID']));
return $d != 0 ? $d : strcmp($a['NAME'], $b['NAME']);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment