Last active
April 9, 2024 20:10
WebTV tellyscript decompressor and de-tokenizer
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* WebTV tellyscript decompressor and de-tokenizer. | |
* | |
* Usage: detok input.tok output.txt | |
* | |
* The output file will be overwritten if it exists. | |
* | |
* ************************************************************************** | |
* | |
* Tellyscript is a C-like language invented by Andy Rubin for use at WebTV. | |
* The language is missing a few C constructs that are difficult to handle | |
* with a simple parser, such as "switch". Every multi-character keyword | |
* and operator is converted to a single-byte token, which makes it compact | |
* and easy to parse. | |
* | |
* When generating tellyscripts in the WebTV service, the source files are | |
* assembled from parts, minified (function and variable names are remapped | |
* to 1- or 2-letter names), tokenized, and then compressed with LZSS | |
* before being delivered to the device. The tokenization process strips | |
* away comments and most whitespace, but leaves EOL markers intact so that | |
* error messages can report the correct line number. | |
* | |
* This tool decompresses the data and reverses the tokenization process. | |
* Whitespace is reintroduced to make the code more readable. | |
* | |
* ************************************************************************** | |
* | |
* v1.0.0 2021/06/19 fadden | |
* - Feels right, but there's no way to know for sure. | |
* v1.1.0 2021/06/29 fadden | |
* - Identified timestamp and decompressed data CRC fields. | |
* | |
* ************************************************************************** | |
* | |
* This was developed by reverse-engineering the file format, based on some | |
* vague 25-year-old memories. I do not have access to the original sources. | |
* There was no equivalent tool at WebTV, because the only thing that ever | |
* needed to parse these files was the box itself. | |
* | |
* This is free and unencumbered software released into the public domain. | |
* For details, visit https://unlicense.org/. | |
*/ | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <stdio.h> | |
#include <time.h> | |
// Enable additional debugging output and generation of the "debug.tok" file. | |
const int DEBUG = false; | |
const char DEBUG_TOK_FILENAME[] = "debug.tok"; | |
// Classic tellyscript limited to 64K IIRC; no idea about MSN. | |
const int MAX_SCRIPT_LEN = 128 * 1024; | |
// Choose between hex and decimal numeric constant formatting. | |
const int HEX_THRESHOLD = 512; | |
// from http://web.mit.edu/freebsd/head/sys/libkern/crc32.c | |
// | |
// COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or | |
// code or tables extracted from it, as desired without restriction. | |
const uint32_t crc32_tab[] = { | |
0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, | |
0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, | |
0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, | |
0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, | |
0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, | |
0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, | |
0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, | |
0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, | |
0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, | |
0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, | |
0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, | |
0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, | |
0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, | |
0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, | |
0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, | |
0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, | |
0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, | |
0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, | |
0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, | |
0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, | |
0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, | |
0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, | |
0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, | |
0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, | |
0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, | |
0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, | |
0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, | |
0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, | |
0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, | |
0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, | |
0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, | |
0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, | |
0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, | |
0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, | |
0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, | |
0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, | |
0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, | |
0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, | |
0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, | |
0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, | |
0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, | |
0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, | |
0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d | |
}; | |
uint32_t crc32x(const uint8_t* buf, size_t size) { | |
const uint8_t* p = buf; | |
uint32_t crc; | |
crc = ~0U; | |
while (size--) { | |
crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8); | |
} | |
// WebTV didn't post-condition result? | |
//return crc ^ ~0U; | |
return crc; | |
} | |
// | |
// Extract a 32-bit big-endian value, and advance 4 bytes. | |
// | |
static uint32_t Next4BE(const uint8_t** pData) { | |
const uint8_t* data = *pData; | |
uint32_t result = (data[0] << 24) | (data[1] << 16) | |
| (data[2] << 8) | data[3]; | |
*pData = *pData + 4; | |
return result; | |
} | |
// | |
// Read entire file into memory. | |
// | |
// Returns buffer with data and length, or NULL on failure. | |
// | |
uint8_t* ReadFile(FILE* fp, size_t* pLength) { | |
if (fseek(fp, 0, SEEK_END) != 0) { | |
fprintf(stderr, "ERROR: unable to seek to end of input file\n"); | |
return NULL; | |
} | |
long length = ftell(fp); | |
rewind(fp); | |
if (length < 16 || length > MAX_SCRIPT_LEN) { | |
fprintf(stderr, "ERROR: bad input file length %ld\n", length); | |
return NULL; | |
} | |
uint8_t* buf = new uint8_t[length]; | |
size_t actual = fread(buf, 1, length, fp); | |
if (actual != (size_t) length) { | |
fprintf(stderr, "ERROR: unable to read entire input file\n"); | |
return NULL; | |
} | |
*pLength = (size_t)length; | |
return buf; | |
} | |
// | |
// Write binary blob to file. | |
// | |
// Returns 0 on success. | |
// | |
int WriteBuffer(FILE* fp, void* data, size_t length) { | |
size_t actual = fwrite(data, 1, length, fp); | |
if (actual != length) { | |
fprintf(stderr, "ERROR: unable to write output file\n"); | |
return 1; | |
} | |
return 0; | |
} | |
// | |
// Decompress LZSS. | |
// | |
// Based on Haruhiko Okumura's classic code. | |
// | |
// Returns decoded data on success, NULL on failure. | |
// | |
uint8_t* DecompressLzss(const uint8_t* inBuf, size_t compLen, size_t decompLen) { | |
const int THRESHOLD = 2; | |
const int MAX_MATCH = 16 + THRESHOLD; | |
unsigned int flags; | |
int i, j, k; | |
// Over-allocate so we can use a simpler buffer overrun check. | |
uint8_t* outBuf = new uint8_t[decompLen + MAX_MATCH]; | |
uint8_t* outStart = outBuf; | |
flags = 0; | |
while (outBuf - outStart < (long)decompLen) { | |
if (((flags >>= 1) & 256) == 0) { | |
flags = (*inBuf++) | 0xff00; /* uses higher byte cleverly */ | |
} /* to count eight */ | |
if (flags & 1) { | |
*outBuf++ = *inBuf++; | |
} else { | |
i = *inBuf++; | |
j = *inBuf++; | |
i |= ((j & 0xf0) << 4); | |
j = (j & 0x0f) + THRESHOLD; | |
for (k = 0; k <= j; k++) { | |
if (i > outBuf - outStart) { | |
fprintf(stderr, "ERROR: bad LZSS pos %d at %ld\n", | |
i, outBuf - outStart); | |
delete[] outStart; | |
return NULL; | |
} | |
*outBuf = *(outBuf - (i+1)); | |
outBuf++; | |
} | |
} | |
} | |
// Make sure we stopped where we expected to. | |
if (outBuf - outStart != (long)decompLen) { | |
fprintf(stderr, "ERROR: bad decomp len (%zd vs %zd)\n", | |
outBuf - outStart, decompLen); | |
delete[] outStart; | |
return NULL; | |
} | |
return outStart; | |
} | |
// | |
// Output a null-terminated numeric constant. | |
// | |
// Constants are always positive. Negative values are prefixed with a | |
// unary '-' operator. (Negative 32-bit values are still possible, but | |
// they seem intended as large positive values, e.g. fixed addresses in | |
// the ROM.) | |
// | |
void OutputNumConst(const uint8_t** pData, size_t len, FILE* outfp) { | |
const uint8_t* data = *pData; | |
// 0x30 - 39 are '0' - '9' | |
// 0x3a - 41 are 'A' - 'F' | |
int val = 0; | |
uint8_t ch; | |
while ((ch = *data++) != 0 && len > 0) { | |
int digit = ch - '0'; | |
if (digit < 0 || digit > 15) { | |
fprintf(stderr, "WARNING: malformed digit 0x%02x (%d)\n", | |
ch, digit); | |
} | |
val = (val << 4) + digit; | |
} | |
// Arbitrary hex vs. decimal threshold. | |
if (val >= 0 && val < HEX_THRESHOLD) { | |
fprintf(outfp, "%d", val); | |
if (false) { | |
// show character value; not all that useful | |
if (val >= 0x20 && val < 0x7f) { | |
fprintf(outfp, "/*%c*/", val); | |
} | |
} | |
} else { | |
fprintf(outfp, "0x%x", val); | |
if (true) { | |
// format 32-bit alphanumeric constant | |
char cstr[5]; | |
bool isConst = true; | |
for (int i = 0; i < 4; i++) { | |
uint8_t chk = (val >> ((3 - i) * 8)) & 0xff; | |
cstr[i] = chk; | |
if (chk < 'A' || chk > 'Z') { | |
isConst = false; | |
} | |
} | |
cstr[4] = '\0'; | |
if (isConst) { | |
fprintf(outfp, "/*%s*/", cstr); | |
} | |
} | |
} | |
*pData = data; | |
} | |
// | |
// Output a null-terminated string constant. | |
// | |
void OutputStrConst(const uint8_t** pData, size_t len, FILE* outfp) { | |
const uint8_t* data = *pData; | |
fputc('"', outfp); | |
uint8_t ch; | |
while ((ch = *data++) != 0 && len-- > 0) { | |
// TODO: escape '"', some others? | |
if (ch == 0x0d) { | |
// don't output CR as a literal | |
fputs("\\r", outfp); | |
} else { | |
fputc(ch, outfp); | |
} | |
} | |
fputc('"', outfp); | |
*pData = data; | |
} | |
// | |
// Output a null-terminated identifier. | |
// | |
void OutputIdentifier(const uint8_t** pData, size_t len, FILE* outfp) { | |
const uint8_t* data = *pData; | |
uint8_t ch; | |
while ((ch = *data++) != 0 && len-- > 0) { | |
fputc(ch, outfp); | |
} | |
*pData = data; | |
} | |
// | |
// Convert tokenized data stream to C-like form. | |
// | |
int DetokenizeData(const uint8_t* decompData, size_t decompLen, FILE* outfp) { | |
// | |
// Most things are either output literally or with a trivial mapping, | |
// but a few things are prefixes for additional data: | |
// | |
// 0x43 'C' - null-terminated numeric constant. Value is in hex, with | |
// [0-9] for the decimal digits and [:-?] for the hex digits, | |
// so you can trivially subtract '0' to get the digit. | |
// 0x49 'I' - null-terminated identifier (e.g. variable name). | |
// 0x53 'S' - null-terminated string constant. | |
// | |
// Script ends with 0xff 0xff 0x00? | |
// | |
// Except for the end marker, all bytes in the file are <= 0x7f. | |
// | |
// Tokens (naming slightly inconsistent... it's fine). | |
const uint8_t TOK_EXCLAM = 0x21; // '!' | |
const uint8_t TOK_PERCENT = 0x25; // '%' | |
const uint8_t TOK_AND_AND = 0x26; // '&' | |
const uint8_t TOK_PAREN_OPEN = 0x28; // '(' | |
const uint8_t TOK_PAREN_CLOSE = 0x29; // ')' | |
const uint8_t TOK_ASTERISK = 0x2a; // '*' | |
const uint8_t TOK_PLUS = 0x2b; // '+' | |
const uint8_t TOK_COMMA = 0x2c; // ',' | |
const uint8_t TOK_MINUS = 0x2d; // '-' | |
const uint8_t TOK_SLASH = 0x2f; // '/' | |
const uint8_t TOK_SEMICOLON = 0x3b; // ';' | |
const uint8_t TOK_LF_ANGLE = 0x3c; // '<' | |
const uint8_t TOK_EQUAL = 0x3d; // '=' | |
const uint8_t TOK_RT_ANGLE = 0x3e; // '>' | |
const uint8_t TOK_ADDR = 0x40; // '@' | |
const uint8_t TOK_NUM_CONST = 0x43; // 'C' | |
const uint8_t TOK_MINUSMINUS = 0x44; // 'D' | |
const uint8_t TOK_EQUAL_EQUAL = 0x45; // 'E' | |
const uint8_t TOK_GT_EQUAL = 0x47; // 'G' | |
const uint8_t TOK_IDENT = 0x49; // 'I' | |
const uint8_t TOK_LT_EQUAL = 0x4c; // 'L' | |
const uint8_t TOK_NEQUAL = 0x4e; // 'N' | |
const uint8_t TOK_PLUSPLUS = 0x50; // 'P' | |
const uint8_t TOK_STR_CONST = 0x53; // 'S' | |
const uint8_t TOK_BRACK_OPEN = 0x5b; // '[' | |
const uint8_t TOK_BRACK_CLOSE = 0x5d; // ']' | |
const uint8_t TOK_BREAK = 0x62; // 'b' | |
const uint8_t TOK_CHAR = 0x63; // 'c' | |
const uint8_t TOK_LONG = 0x6c; // 'l' | |
const uint8_t TOK_ELSE = 0x65; // 'e' | |
const uint8_t TOK_FOR = 0x66; // 'f' | |
const uint8_t TOK_IF = 0x69; // 'i' | |
const uint8_t TOK_RETURN = 0x72; // 'r' | |
const uint8_t TOK_WHILE = 0x77; // 'w' | |
const uint8_t TOK_CURLY_OPEN = 0x7b; // '{' | |
const uint8_t TOK_OR_OR = 0x7c; // '|' | |
const uint8_t TOK_CURLY_CLOSE = 0x7d; // '}' | |
const uint8_t TOK_EOL = 0x7f; // DEL | |
const uint8_t TOK_EOF = 0xff; | |
const uint8_t* decompStart = decompData; | |
int indent = 0; | |
uint8_t token; | |
while ((token = *decompData++) != TOK_EOF) { | |
size_t remLen = decompLen - (decompData - decompStart); | |
if (remLen < 0) { | |
fprintf(stderr, "ERROR: ran off end of script\n"); | |
return 1; | |
} | |
switch (token) { | |
case TOK_BREAK: | |
fputs("break", outfp); | |
break; | |
case TOK_FOR: | |
fputs("for ", outfp); | |
break; | |
case TOK_CHAR: | |
fputs("char ", outfp); | |
break; | |
case TOK_ELSE: | |
if (decompData >= decompStart + 2 && decompData[-2] == TOK_CURLY_CLOSE) { | |
fputs(" else ", outfp); | |
} else { | |
fputs("else ", outfp); | |
} | |
break; | |
case TOK_IF: | |
fputs("if ", outfp); | |
break; | |
case TOK_LONG: | |
fputs("int ", outfp); | |
break; | |
case TOK_RETURN: | |
fputs("return ", outfp); | |
break; | |
case TOK_WHILE: | |
fputs("while ", outfp); | |
break; | |
case TOK_MINUSMINUS: | |
fputs("--", outfp); | |
break; | |
case TOK_PLUSPLUS: | |
fputs("++", outfp); | |
break; | |
case TOK_NEQUAL: | |
fputs(" != ", outfp); | |
break; | |
case TOK_EQUAL_EQUAL: | |
fputs(" == ", outfp); | |
break; | |
case TOK_LT_EQUAL: | |
fputs(" <= ", outfp); | |
break; | |
case TOK_GT_EQUAL: | |
fputs(" >= ", outfp); | |
break; | |
case TOK_OR_OR: | |
fputs(" || ", outfp); | |
break; | |
case TOK_AND_AND: | |
fputs(" && ", outfp); | |
break; | |
case TOK_ADDR: | |
fputs("&", outfp); | |
break; | |
case TOK_NUM_CONST: | |
OutputNumConst(&decompData, remLen, outfp); | |
break; | |
case TOK_STR_CONST: | |
OutputStrConst(&decompData, remLen, outfp); | |
break; | |
case TOK_IDENT: | |
OutputIdentifier(&decompData, remLen, outfp); | |
break; | |
case TOK_EXCLAM: | |
case TOK_PAREN_OPEN: | |
case TOK_PAREN_CLOSE: | |
case TOK_ASTERISK: // TODO: add spaces when used for mult (but not deref) | |
case TOK_SEMICOLON: | |
case TOK_BRACK_OPEN: | |
case TOK_BRACK_CLOSE: | |
// copy literal | |
fputc(token, outfp); | |
break; | |
case TOK_COMMA: | |
// copy literal, add space | |
fprintf(outfp, "%c ", token); | |
break; | |
case TOK_EQUAL: | |
case TOK_PLUS: | |
case TOK_MINUS: // TODO: space after looks wrong for unary minus | |
case TOK_SLASH: | |
case TOK_PERCENT: | |
case TOK_LF_ANGLE: | |
case TOK_RT_ANGLE: | |
// copy literal, add spaces before and after | |
fprintf(outfp, " %c ", token); | |
break; | |
case TOK_CURLY_OPEN: | |
indent++; | |
if (decompData >= decompStart + 2 && | |
decompData[-2] == TOK_PAREN_CLOSE) { | |
// insert space in things like "if (blah){" | |
fprintf(outfp, " %c", token); | |
} else { | |
fputc(token, outfp); | |
} | |
break; | |
case TOK_CURLY_CLOSE: | |
indent--; | |
fputc(token, outfp); | |
break; | |
case TOK_EOL: | |
fprintf(outfp, "\n"); | |
if (DEBUG) { | |
// Map contents to hex dump. | |
fprintf(outfp, "%04lx ", decompData - decompStart); | |
} | |
// Synthesize indentation (4 spaces per). | |
{ | |
int spaces = indent * 4; | |
if (decompData < decompStart + decompLen && | |
*decompData == TOK_CURLY_CLOSE) { | |
// Un-indent closing braces at the start of a line. | |
spaces -= 4; | |
} | |
for (int i = 0; i < spaces; i++) { | |
fputc(' ', outfp); | |
} | |
} | |
break; | |
default: | |
fprintf(outfp, "?%02x?", token); | |
break; | |
} | |
} | |
if (indent != 0) { | |
// Check for imbalanced curly braces; shouldn't happen in valid script. | |
fprintf(stderr, "WARNING: indentation level didn't zero out\n"); | |
} | |
// Files seem to end in "0xff 0xff 0x00", and we stop at the first 0xff. | |
if (decompData - decompStart < (int)decompLen - 2) { | |
// early EOF? | |
fprintf(stderr, "WARNING: only consumed %ld of %zd bytes\n", | |
decompData - decompStart, decompLen); | |
} | |
return 0; | |
} | |
// | |
// Convert a script from compressed + tokenized form to text. | |
// | |
int ProcessScript(const uint8_t* data, size_t length, FILE* outfp) { | |
const uint8_t* start = data; | |
// Not entirely sure what these are, other than the two lengths. The | |
// hashes are likely CRC-32, but I haven't found a computation | |
// that yields a match on the compressed or uncompressed data. | |
uint32_t magic = Next4BE(&data); // +00 magic number | |
uint32_t vers1 = Next4BE(&data); // +04 version number?? | |
uint32_t vers2 = Next4BE(&data); // +08 version number?? | |
uint32_t hash1 = Next4BE(&data); // +0c header checksum? | |
uint32_t timestamp = Next4BE(&data); // +10 32-bit UNIX timestamp | |
uint32_t compLen = Next4BE(&data); // +14 length of compressed data | |
uint32_t decompLen = Next4BE(&data); // +18 length after decompression | |
uint32_t decompCrc = Next4BE(&data); // +1c CRC on decompressed data | |
uint32_t thing1 = Next4BE(&data); // +20 one-byte flags? padding? | |
size_t headerLen = data - start; | |
// Dump the header. | |
printf("Input file is %zd bytes\n", length); | |
printf("Header contents (%zd bytes):\n", headerLen); | |
printf(" Magic: %c%c%c%c\n", | |
(char)((magic >> 24) & 0xff), | |
(char)((magic >> 16) & 0xff), | |
(char)((magic >> 8) & 0xff), | |
(char)(magic & 0xff)); | |
printf(" Version1: %d\n", vers1); | |
printf(" Version2: %d\n", vers2); | |
printf(" Hash1: %08x\n", hash1); | |
time_t when = (time_t) timestamp; | |
printf(" Timestamp: %08x - %s", timestamp, ctime(&when)); | |
printf(" CompLen: %u\n", compLen); | |
printf(" DecompLen: %u\n", decompLen); | |
printf(" DecompCRC: %08x\n", decompCrc); | |
printf(" Thing1: %08x\n", thing1); | |
if (decompLen < 1 || decompLen > MAX_SCRIPT_LEN) { | |
fprintf(stderr, "ERROR: invalid decompressed length\n"); | |
return 1; | |
} | |
// See if compressed length matches file length. | |
if (compLen + headerLen > length) { | |
// file truncated? | |
fprintf(stderr, "ERROR: file too short (%zd) for compLen=%u\n", | |
length, compLen); | |
return 1; | |
} else if (compLen + headerLen != length) { | |
fprintf(stderr, | |
"WARNING: comp len expected to be %u, file holds %zd\n", | |
compLen, length - headerLen); | |
// keep going | |
} | |
#if 0 | |
// Try a CRC on all header fields that follow hash1. | |
uint32_t headerCheck = crc32x(start + 16, headerLen - 16); | |
printf("header CRC: 0x%08x\n", headerCheck); | |
// Try to check the compressed data CRC. Not sure if it's a CRC-32 or | |
// what bytes the hash includes, so try a few. | |
for (int i = 16; i <= 36; i += 4) { | |
printf("computed CRC(+%d %ld)=0x%08x\n", i, length - i, | |
crc32x(start + i, length - i)); | |
} | |
#endif | |
// Decompress to tokenized form. | |
uint8_t* decompData = DecompressLzss(data, compLen, decompLen); | |
if (decompData == NULL) { | |
fprintf(stderr, "ERROR: LZSS decompression failed\n"); | |
return 1; | |
} | |
// Check CRC on decompressed data. | |
uint32_t calcCrc = crc32x(decompData, decompLen); | |
printf("Calculated decomp CRC=0x%08x (%s)\n", calcCrc, | |
calcCrc == decompCrc ? "match" : "MISMATCH"); | |
// Dump tokenized form, for debugging. | |
if (DEBUG) { | |
printf("Writing decompressed data to %s\n", DEBUG_TOK_FILENAME); | |
FILE* tokfp = fopen(DEBUG_TOK_FILENAME, "wb"); | |
if (tokfp != NULL) { | |
WriteBuffer(tokfp, decompData, decompLen); | |
} | |
fclose(tokfp); | |
} | |
// Convert tokenized to text. | |
int result = DetokenizeData(decompData, decompLen, outfp); | |
delete[] decompData; | |
return result; | |
} | |
// | |
// Main entry point. | |
// | |
int main(int argc, char** argv) { | |
if (argc != 3) { | |
fprintf(stderr, "Usage: detok <infile.tok> <outfile.txt>\n"); | |
return 2; | |
} | |
FILE* infp = fopen(argv[1], "rb"); | |
if (infp == NULL) { | |
perror("Unable to open input file"); | |
return 1; | |
} | |
size_t length; | |
uint8_t* data = ReadFile(infp, &length); | |
fclose(infp); | |
if (data == NULL) { | |
return 1; | |
} | |
FILE* outfp = fopen(argv[2], "w"); | |
if (outfp == NULL) { | |
perror("Unable to open output file"); | |
return 1; | |
} | |
int result = ProcessScript(data, length, outfp); | |
fclose(outfp); | |
delete[] data; | |
if (result == 0) { | |
printf("Success\n"); | |
} | |
return result; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment