fadden/detok.cpp

## detok.cpp
/*
 * WebTV tellyscript decompressor and de-tokenizer.
 *
 * Usage: detok input.tok output.txt
 *
 * The output file will be overwritten if it exists.
 *
 * **************************************************************************
 *
 * Tellyscript is a C-like language invented by Andy Rubin for use at WebTV.
 * The language is missing a few C constructs that are difficult to handle
 * with a simple parser, such as "switch".  Every multi-character keyword
 * and operator is converted to a single-byte token, which makes it compact
 * and easy to parse.
 *
 * When generating tellyscripts in the WebTV service, the source files are
 * assembled from parts, minified (function and variable names are remapped
 * to 1- or 2-letter names), tokenized, and then compressed with LZSS
 * before being delivered to the device.  The tokenization process strips
 * away comments and most whitespace, but leaves EOL markers intact so that
 * error messages can report the correct line number.
 *
 * This tool decompresses the data and reverses the tokenization process.
 * Whitespace is reintroduced to make the code more readable.
 *
 * **************************************************************************
 *
 * v1.0.0  2021/06/19  fadden
 *  - Feels right, but there's no way to know for sure.
 * v1.1.0  2021/06/29  fadden
 *  - Identified timestamp and decompressed data CRC fields.
 *
 * **************************************************************************
 *
 * This was developed by reverse-engineering the file format, based on some
 * vague 25-year-old memories.  I do not have access to the original sources.
 * There was no equivalent tool at WebTV, because the only thing that ever
 * needed to parse these files was the box itself.
 *
 * This is free and unencumbered software released into the public domain.
 * For details, visit https://unlicense.org/.
 */
#include <stdlib.h>
#include <stdint.h>
#include <stdio.h>
#include <time.h>

// Enable additional debugging output and generation of the "debug.tok" file.
const int DEBUG = false;

const char DEBUG_TOK_FILENAME[] = "debug.tok";

// Classic tellyscript limited to 64K IIRC; no idea about MSN.
const int MAX_SCRIPT_LEN = 128 * 1024;
// Choose between hex and decimal numeric constant formatting.
const int HEX_THRESHOLD = 512;

// from http://web.mit.edu/freebsd/head/sys/libkern/crc32.c
//
// COPYRIGHT (C) 1986 Gary S. Brown.  You may use this program, or
// code or tables extracted from it, as desired without restriction.
const uint32_t crc32_tab[] = {
    0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
    0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
    0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
    0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
    0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
    0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
    0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
    0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
    0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
    0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
    0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
    0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
    0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
    0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
    0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
    0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
    0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
    0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
    0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
    0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
    0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
    0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
    0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
    0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
    0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
    0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
    0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
    0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
    0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
    0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
    0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
    0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
    0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
    0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
    0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
    0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
    0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
    0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
    0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
    0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
    0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
    0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
    0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
};

uint32_t crc32x(const uint8_t* buf, size_t size) {
    const uint8_t* p = buf;
    uint32_t crc;

    crc = ~0U;
    while (size--) {
        crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
    }
    // WebTV didn't post-condition result?
    //return crc ^ ~0U;
    return crc;
}


//
// Extract a 32-bit big-endian value, and advance 4 bytes.
//
static uint32_t Next4BE(const uint8_t** pData) {
    const uint8_t* data = *pData;
    uint32_t result = (data[0] << 24) | (data[1] << 16)
        | (data[2] << 8) | data[3];
    *pData = *pData + 4;
    return result;
}

//
// Read entire file into memory.
//
// Returns buffer with data and length, or NULL on failure.
//
uint8_t* ReadFile(FILE* fp, size_t* pLength) {
    if (fseek(fp, 0, SEEK_END) != 0) {
        fprintf(stderr, "ERROR: unable to seek to end of input file\n");
        return NULL;
    }
    long length = ftell(fp);
    rewind(fp);
    if (length < 16 || length > MAX_SCRIPT_LEN) {
        fprintf(stderr, "ERROR: bad input file length %ld\n", length);
        return NULL;
    }

    uint8_t* buf = new uint8_t[length];

    size_t actual = fread(buf, 1, length, fp);
    if (actual != (size_t) length) {
        fprintf(stderr, "ERROR: unable to read entire input file\n");
        return NULL;
    }

    *pLength = (size_t)length;
    return buf;
}

//
// Write binary blob to file.
//
// Returns 0 on success.
//
int WriteBuffer(FILE* fp, void* data, size_t length) {
    size_t actual = fwrite(data, 1, length, fp);
    if (actual != length) {
        fprintf(stderr, "ERROR: unable to write output file\n");
        return 1;
    }
    return 0;
}

//
// Decompress LZSS.
//
// Based on Haruhiko Okumura's classic code.
//
// Returns decoded data on success, NULL on failure.
//
uint8_t* DecompressLzss(const uint8_t* inBuf, size_t compLen, size_t decompLen) {
    const int THRESHOLD = 2;
    const int MAX_MATCH = 16 + THRESHOLD;
    unsigned int flags;
    int i, j, k;

    // Over-allocate so we can use a simpler buffer overrun check.
    uint8_t* outBuf = new uint8_t[decompLen + MAX_MATCH];
    uint8_t* outStart = outBuf;

    flags = 0;
    while (outBuf - outStart < (long)decompLen) {
        if (((flags >>= 1) & 256) == 0) {
            flags = (*inBuf++) | 0xff00;    /* uses higher byte cleverly */
        }                                   /* to count eight */
        if (flags & 1) {
            *outBuf++ = *inBuf++;
        } else {
            i = *inBuf++;
            j = *inBuf++;
            i |= ((j & 0xf0) << 4);
            j = (j & 0x0f) + THRESHOLD;
            for (k = 0; k <= j; k++) {
                if (i > outBuf - outStart) {
                    fprintf(stderr, "ERROR: bad LZSS pos %d at %ld\n",
                        i, outBuf - outStart);
                    delete[] outStart;
                    return NULL;
                }
                *outBuf = *(outBuf - (i+1));
                outBuf++;
            }
        }
    }

    // Make sure we stopped where we expected to.
    if (outBuf - outStart != (long)decompLen) {
        fprintf(stderr, "ERROR: bad decomp len (%zd vs %zd)\n",
            outBuf - outStart, decompLen);
        delete[] outStart;
        return NULL;
    }

    return outStart;
}

//
// Output a null-terminated numeric constant.
//
// Constants are always positive.  Negative values are prefixed with a
// unary '-' operator.  (Negative 32-bit values are still possible, but
// they seem intended as large positive values, e.g. fixed addresses in
// the ROM.)
//
void OutputNumConst(const uint8_t** pData, size_t len, FILE* outfp) {
    const uint8_t* data = *pData;

    // 0x30 - 39 are '0' - '9'
    // 0x3a - 41 are 'A' - 'F'

    int val = 0;
    uint8_t ch;
    while ((ch = *data++) != 0 && len > 0) {
        int digit = ch - '0';
        if (digit < 0 || digit > 15) {
            fprintf(stderr, "WARNING: malformed digit 0x%02x (%d)\n",
                ch, digit);
        }
        val = (val << 4) + digit;
    }

    // Arbitrary hex vs. decimal threshold.
    if (val >= 0 && val < HEX_THRESHOLD) {
        fprintf(outfp, "%d", val);
        if (false) {
            // show character value; not all that useful
            if (val >= 0x20 && val < 0x7f) {
                fprintf(outfp, "/*%c*/", val);
            }
        }
    } else {
        fprintf(outfp, "0x%x", val);
        if (true) {
            // format 32-bit alphanumeric constant
            char cstr[5];
            bool isConst = true;
            for (int i = 0; i < 4; i++) {
                uint8_t chk = (val >> ((3 - i) * 8)) & 0xff;
                cstr[i] = chk;
                if (chk < 'A' || chk > 'Z') {
                    isConst = false;
                }
            }
            cstr[4] = '\0';
            if (isConst) {
                fprintf(outfp, "/*%s*/", cstr);
            }
        }
    }

    *pData = data;
}

//
// Output a null-terminated string constant.
//
void OutputStrConst(const uint8_t** pData, size_t len, FILE* outfp) {
    const uint8_t* data = *pData;

    fputc('"', outfp);
    uint8_t ch;
    while ((ch = *data++) != 0 && len-- > 0) {
        // TODO: escape '"', some others?
        if (ch == 0x0d) {
            // don't output CR as a literal
            fputs("\\r", outfp);
        } else {
            fputc(ch, outfp);
        }
    }
    fputc('"', outfp);

    *pData = data;
}

//
// Output a null-terminated identifier.
//
void OutputIdentifier(const uint8_t** pData, size_t len, FILE* outfp) {
    const uint8_t* data = *pData;

    uint8_t ch;
    while ((ch = *data++) != 0 && len-- > 0) {
        fputc(ch, outfp);
    }

    *pData = data;
}

//
// Convert tokenized data stream to C-like form.
//
int DetokenizeData(const uint8_t* decompData, size_t decompLen, FILE* outfp) {
    //
    // Most things are either output literally or with a trivial mapping,
    // but a few things are prefixes for additional data:
    //
    //  0x43 'C' - null-terminated numeric constant.  Value is in hex, with
    //             [0-9] for the decimal digits and [:-?] for the hex digits,
    //             so you can trivially subtract '0' to get the digit.
    //  0x49 'I' - null-terminated identifier (e.g. variable name).
    //  0x53 'S' - null-terminated string constant.
    //
    // Script ends with 0xff 0xff 0x00?
    //
    // Except for the end marker, all bytes in the file are <= 0x7f.
    //

    // Tokens (naming slightly inconsistent... it's fine).
    const uint8_t TOK_EXCLAM = 0x21;        // '!'
    const uint8_t TOK_PERCENT = 0x25;       // '%'
    const uint8_t TOK_AND_AND = 0x26;       // '&'
    const uint8_t TOK_PAREN_OPEN = 0x28;    // '('
    const uint8_t TOK_PAREN_CLOSE = 0x29;   // ')'
    const uint8_t TOK_ASTERISK = 0x2a;      // '*'
    const uint8_t TOK_PLUS = 0x2b;          // '+'
    const uint8_t TOK_COMMA = 0x2c;         // ','
    const uint8_t TOK_MINUS = 0x2d;         // '-'
    const uint8_t TOK_SLASH = 0x2f;         // '/'
    const uint8_t TOK_SEMICOLON = 0x3b;     // ';'
    const uint8_t TOK_LF_ANGLE = 0x3c;      // '<'
    const uint8_t TOK_EQUAL = 0x3d;         // '='
    const uint8_t TOK_RT_ANGLE = 0x3e;      // '>'
    const uint8_t TOK_ADDR = 0x40;          // '@'
    const uint8_t TOK_NUM_CONST = 0x43;     // 'C'
    const uint8_t TOK_MINUSMINUS = 0x44;    // 'D'
    const uint8_t TOK_EQUAL_EQUAL = 0x45;   // 'E'
    const uint8_t TOK_GT_EQUAL = 0x47;      // 'G'
    const uint8_t TOK_IDENT = 0x49;         // 'I'
    const uint8_t TOK_LT_EQUAL = 0x4c;      // 'L'
    const uint8_t TOK_NEQUAL = 0x4e;        // 'N'
    const uint8_t TOK_PLUSPLUS = 0x50;      // 'P'
    const uint8_t TOK_STR_CONST = 0x53;     // 'S'
    const uint8_t TOK_BRACK_OPEN = 0x5b;    // '['
    const uint8_t TOK_BRACK_CLOSE = 0x5d;   // ']'
    const uint8_t TOK_BREAK = 0x62;         // 'b'
    const uint8_t TOK_CHAR = 0x63;          // 'c'
    const uint8_t TOK_LONG = 0x6c;          // 'l'
    const uint8_t TOK_ELSE = 0x65;          // 'e'
    const uint8_t TOK_FOR = 0x66;           // 'f'
    const uint8_t TOK_IF = 0x69;            // 'i'
    const uint8_t TOK_RETURN = 0x72;        // 'r'
    const uint8_t TOK_WHILE = 0x77;         // 'w'
    const uint8_t TOK_CURLY_OPEN = 0x7b;    // '{'
    const uint8_t TOK_OR_OR = 0x7c;         // '|'
    const uint8_t TOK_CURLY_CLOSE = 0x7d;   // '}'
    const uint8_t TOK_EOL = 0x7f;           // DEL

    const uint8_t TOK_EOF = 0xff;

    const uint8_t* decompStart = decompData;

    int indent = 0;
    uint8_t token;
    while ((token = *decompData++) != TOK_EOF) {
        size_t remLen = decompLen - (decompData - decompStart);
        if (remLen < 0) {
            fprintf(stderr, "ERROR: ran off end of script\n");
            return 1;
        }

        switch (token) {
        case TOK_BREAK:
            fputs("break", outfp);
            break;
        case TOK_FOR:
            fputs("for ", outfp);
            break;
        case TOK_CHAR:
            fputs("char ", outfp);
            break;
        case TOK_ELSE:
            if (decompData >= decompStart + 2 && decompData[-2] == TOK_CURLY_CLOSE) {
                fputs(" else ", outfp);
            } else {
                fputs("else ", outfp);
            }
            break;
        case TOK_IF:
            fputs("if ", outfp);
            break;
        case TOK_LONG:
            fputs("int ", outfp);
            break;
        case TOK_RETURN:
            fputs("return ", outfp);
            break;
        case TOK_WHILE:
            fputs("while ", outfp);
            break;
        case TOK_MINUSMINUS:
            fputs("--", outfp);
            break;
        case TOK_PLUSPLUS:
            fputs("++", outfp);
            break;
        case TOK_NEQUAL:
            fputs(" != ", outfp);
            break;
        case TOK_EQUAL_EQUAL:
            fputs(" == ", outfp);
            break;
        case TOK_LT_EQUAL:
            fputs(" <= ", outfp);
            break;
        case TOK_GT_EQUAL:
            fputs(" >= ", outfp);
            break;
        case TOK_OR_OR:
            fputs(" || ", outfp);
            break;
        case TOK_AND_AND:
            fputs(" && ", outfp);
            break;
        case TOK_ADDR:
            fputs("&", outfp);
            break;

        case TOK_NUM_CONST:
            OutputNumConst(&decompData, remLen, outfp);
            break;
        case TOK_STR_CONST:
            OutputStrConst(&decompData, remLen, outfp);
            break;
        case TOK_IDENT:
            OutputIdentifier(&decompData, remLen, outfp);
            break;
        case TOK_EXCLAM:
        case TOK_PAREN_OPEN:
        case TOK_PAREN_CLOSE:
        case TOK_ASTERISK:  // TODO: add spaces when used for mult (but not deref)
        case TOK_SEMICOLON:
        case TOK_BRACK_OPEN:
        case TOK_BRACK_CLOSE:
            // copy literal
            fputc(token, outfp);
            break;

        case TOK_COMMA:
            // copy literal, add space
            fprintf(outfp, "%c ", token);
            break;

        case TOK_EQUAL:
        case TOK_PLUS:
        case TOK_MINUS:     // TODO: space after looks wrong for unary minus
        case TOK_SLASH:
        case TOK_PERCENT:
        case TOK_LF_ANGLE:
        case TOK_RT_ANGLE:
            // copy literal, add spaces before and after
            fprintf(outfp, " %c ", token);
            break;

        case TOK_CURLY_OPEN:
            indent++;
            if (decompData >= decompStart + 2 &&
                    decompData[-2] == TOK_PAREN_CLOSE) {
                // insert space in things like "if (blah){"
                fprintf(outfp, " %c", token);
            } else {
                fputc(token, outfp);
            }
            break;
        case TOK_CURLY_CLOSE:
            indent--;
            fputc(token, outfp);
            break;

        case TOK_EOL:
            fprintf(outfp, "\n");
            if (DEBUG) {
                // Map contents to hex dump.
                fprintf(outfp, "%04lx ", decompData - decompStart);
            }
            // Synthesize indentation (4 spaces per).
            {
                int spaces = indent * 4;
                if (decompData < decompStart + decompLen &&
                        *decompData == TOK_CURLY_CLOSE) {
                    // Un-indent closing braces at the start of a line.
                    spaces -= 4;
                }
                for (int i = 0; i < spaces; i++) {
                    fputc(' ', outfp);
                }
            }
            break;

        default:
            fprintf(outfp, "?%02x?", token);
            break;
        }
    }

    if (indent != 0) {
        // Check for imbalanced curly braces; shouldn't happen in valid script.
        fprintf(stderr, "WARNING: indentation level didn't zero out\n");
    }

    // Files seem to end in "0xff 0xff 0x00", and we stop at the first 0xff.
    if (decompData - decompStart < (int)decompLen - 2) {
        // early EOF?
        fprintf(stderr, "WARNING: only consumed %ld of %zd bytes\n",
            decompData - decompStart, decompLen);
    }

    return 0;
}

//
// Convert a script from compressed + tokenized form to text.
//
int ProcessScript(const uint8_t* data, size_t length, FILE* outfp) {
    const uint8_t* start = data;

    // Not entirely sure what these are, other than the two lengths.  The
    // hashes are likely CRC-32, but I haven't found a computation
    // that yields a match on the compressed or uncompressed data.
    uint32_t magic = Next4BE(&data);        // +00 magic number
    uint32_t vers1 = Next4BE(&data);        // +04 version number??
    uint32_t vers2 = Next4BE(&data);        // +08 version number??
    uint32_t hash1 = Next4BE(&data);        // +0c header checksum?
    uint32_t timestamp = Next4BE(&data);    // +10 32-bit UNIX timestamp
    uint32_t compLen = Next4BE(&data);      // +14 length of compressed data
    uint32_t decompLen = Next4BE(&data);    // +18 length after decompression
    uint32_t decompCrc = Next4BE(&data);    // +1c CRC on decompressed data
    uint32_t thing1 = Next4BE(&data);       // +20 one-byte flags? padding?

    size_t headerLen = data - start;

    // Dump the header.
    printf("Input file is %zd bytes\n", length);
    printf("Header contents (%zd bytes):\n", headerLen);
    printf("  Magic: %c%c%c%c\n",
        (char)((magic >> 24) & 0xff),
        (char)((magic >> 16) & 0xff),
        (char)((magic >> 8) & 0xff),
        (char)(magic & 0xff));
    printf("  Version1: %d\n", vers1);
    printf("  Version2: %d\n", vers2);
    printf("  Hash1: %08x\n", hash1);
    time_t when = (time_t) timestamp;
    printf("  Timestamp: %08x - %s", timestamp, ctime(&when));
    printf("  CompLen: %u\n", compLen);
    printf("  DecompLen: %u\n", decompLen);
    printf("  DecompCRC: %08x\n", decompCrc);
    printf("  Thing1: %08x\n", thing1);

    if (decompLen < 1 || decompLen > MAX_SCRIPT_LEN) {
        fprintf(stderr, "ERROR: invalid decompressed length\n");
        return 1;
    }

    // See if compressed length matches file length.
    if (compLen + headerLen > length) {
        // file truncated?
        fprintf(stderr, "ERROR: file too short (%zd) for compLen=%u\n",
            length, compLen);
        return 1;
    } else if (compLen + headerLen != length) {
        fprintf(stderr,
            "WARNING: comp len expected to be %u, file holds %zd\n",
            compLen, length - headerLen);
        // keep going
    }

#if 0
    // Try a CRC on all header fields that follow hash1.
    uint32_t headerCheck = crc32x(start + 16, headerLen - 16);
    printf("header CRC: 0x%08x\n", headerCheck);

    // Try to check the compressed data CRC.  Not sure if it's a CRC-32 or
    // what bytes the hash includes, so try a few.
    for (int i = 16; i <= 36; i += 4) {
        printf("computed CRC(+%d %ld)=0x%08x\n", i, length - i,
            crc32x(start + i, length - i));
    }
#endif

    // Decompress to tokenized form.
    uint8_t* decompData = DecompressLzss(data, compLen, decompLen);
    if (decompData == NULL) {
        fprintf(stderr, "ERROR: LZSS decompression failed\n");
        return 1;
    }

    // Check CRC on decompressed data.
    uint32_t calcCrc = crc32x(decompData, decompLen);
    printf("Calculated decomp CRC=0x%08x (%s)\n", calcCrc,
        calcCrc == decompCrc ? "match" : "MISMATCH");

    // Dump tokenized form, for debugging.
    if (DEBUG) {
        printf("Writing decompressed data to %s\n", DEBUG_TOK_FILENAME);
        FILE* tokfp = fopen(DEBUG_TOK_FILENAME, "wb");
        if (tokfp != NULL) {
            WriteBuffer(tokfp, decompData, decompLen);
        }
        fclose(tokfp);
    }

    // Convert tokenized to text.
    int result = DetokenizeData(decompData, decompLen, outfp);

    delete[] decompData;
    return result;
}

//
// Main entry point.
//
int main(int argc, char** argv) {
    if (argc != 3) {
        fprintf(stderr, "Usage: detok <infile.tok> <outfile.txt>\n");
        return 2;
    }

    FILE* infp = fopen(argv[1], "rb");
    if (infp == NULL) {
        perror("Unable to open input file");
        return 1;
    }

    size_t length;
    uint8_t* data = ReadFile(infp, &length);
    fclose(infp);
    if (data == NULL) {
        return 1;
    }

    FILE* outfp = fopen(argv[2], "w");
    if (outfp == NULL) {
        perror("Unable to open output file");
        return 1;
    }

    int result = ProcessScript(data, length, outfp);
    fclose(outfp);

    delete[] data;

    if (result == 0) {
        printf("Success\n");
    }
    return result;
}
	/*
	* WebTV tellyscript decompressor and de-tokenizer.
	*
	* Usage: detok input.tok output.txt
	*
	* The output file will be overwritten if it exists.
	*
	* **************************************************************************
	*
	* Tellyscript is a C-like language invented by Andy Rubin for use at WebTV.
	* The language is missing a few C constructs that are difficult to handle
	* with a simple parser, such as "switch". Every multi-character keyword
	* and operator is converted to a single-byte token, which makes it compact
	* and easy to parse.
	*
	* When generating tellyscripts in the WebTV service, the source files are
	* assembled from parts, minified (function and variable names are remapped
	* to 1- or 2-letter names), tokenized, and then compressed with LZSS
	* before being delivered to the device. The tokenization process strips
	* away comments and most whitespace, but leaves EOL markers intact so that
	* error messages can report the correct line number.
	*
	* This tool decompresses the data and reverses the tokenization process.
	* Whitespace is reintroduced to make the code more readable.
	*
	* **************************************************************************
	*
	* v1.0.0 2021/06/19 fadden
	* - Feels right, but there's no way to know for sure.
	* v1.1.0 2021/06/29 fadden
	* - Identified timestamp and decompressed data CRC fields.
	*
	* **************************************************************************
	*
	* This was developed by reverse-engineering the file format, based on some
	* vague 25-year-old memories. I do not have access to the original sources.
	* There was no equivalent tool at WebTV, because the only thing that ever
	* needed to parse these files was the box itself.
	*
	* This is free and unencumbered software released into the public domain.
	* For details, visit https://unlicense.org/.
	*/
	#include <stdlib.h>
	#include <stdint.h>
	#include <stdio.h>
	#include <time.h>

	// Enable additional debugging output and generation of the "debug.tok" file.
	const int DEBUG = false;

	const char DEBUG_TOK_FILENAME[] = "debug.tok";

	// Classic tellyscript limited to 64K IIRC; no idea about MSN.
	const int MAX_SCRIPT_LEN = 128 * 1024;
	// Choose between hex and decimal numeric constant formatting.
	const int HEX_THRESHOLD = 512;

	// from http://web.mit.edu/freebsd/head/sys/libkern/crc32.c
	//
	// COPYRIGHT (C) 1986 Gary S. Brown. You may use this program, or
	// code or tables extracted from it, as desired without restriction.
	const uint32_t crc32_tab[] = {
	0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
	0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988,
	0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2,
	0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7,
	0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9,
	0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172,
	0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c,
	0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59,
	0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423,
	0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924,
	0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106,
	0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433,
	0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d,
	0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e,
	0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950,
	0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65,
	0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7,
	0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0,
	0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa,
	0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f,
	0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81,
	0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a,
	0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84,
	0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1,
	0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb,
	0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc,
	0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e,
	0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b,
	0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55,
	0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236,
	0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28,
	0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d,
	0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f,
	0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38,
	0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242,
	0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777,
	0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69,
	0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2,
	0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc,
	0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9,
	0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693,
	0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94,
	0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d
	};

	uint32_t crc32x(const uint8_t* buf, size_t size) {
	const uint8_t* p = buf;
	uint32_t crc;

	crc = ~0U;
	while (size--) {
	crc = crc32_tab[(crc ^ *p++) & 0xFF] ^ (crc >> 8);
	}
	// WebTV didn't post-condition result?
	//return crc ^ ~0U;
	return crc;
	}


	//
	// Extract a 32-bit big-endian value, and advance 4 bytes.
	//
	static uint32_t Next4BE(const uint8_t** pData) {
	const uint8_t* data = *pData;
	uint32_t result = (data[0] << 24) \| (data[1] << 16)
	\| (data[2] << 8) \| data[3];
	pData = pData + 4;
	return result;
	}

	//
	// Read entire file into memory.
	//
	// Returns buffer with data and length, or NULL on failure.
	//
	uint8_t* ReadFile(FILE* fp, size_t* pLength) {
	if (fseek(fp, 0, SEEK_END) != 0) {
	fprintf(stderr, "ERROR: unable to seek to end of input file\n");
	return NULL;
	}
	long length = ftell(fp);
	rewind(fp);
	if (length < 16 \|\| length > MAX_SCRIPT_LEN) {
	fprintf(stderr, "ERROR: bad input file length %ld\n", length);
	return NULL;
	}

	uint8_t* buf = new uint8_t[length];

	size_t actual = fread(buf, 1, length, fp);
	if (actual != (size_t) length) {
	fprintf(stderr, "ERROR: unable to read entire input file\n");
	return NULL;
	}

	*pLength = (size_t)length;
	return buf;
	}

	//
	// Write binary blob to file.
	//
	// Returns 0 on success.
	//
	int WriteBuffer(FILE* fp, void* data, size_t length) {
	size_t actual = fwrite(data, 1, length, fp);
	if (actual != length) {
	fprintf(stderr, "ERROR: unable to write output file\n");
	return 1;
	}
	return 0;
	}

	//
	// Decompress LZSS.
	//
	// Based on Haruhiko Okumura's classic code.
	//
	// Returns decoded data on success, NULL on failure.
	//
	uint8_t* DecompressLzss(const uint8_t* inBuf, size_t compLen, size_t decompLen) {
	const int THRESHOLD = 2;
	const int MAX_MATCH = 16 + THRESHOLD;
	unsigned int flags;
	int i, j, k;

	// Over-allocate so we can use a simpler buffer overrun check.
	uint8_t* outBuf = new uint8_t[decompLen + MAX_MATCH];
	uint8_t* outStart = outBuf;

	flags = 0;
	while (outBuf - outStart < (long)decompLen) {
	if (((flags >>= 1) & 256) == 0) {
	flags = (inBuf++) \| 0xff00; / uses higher byte cleverly */
	} /* to count eight */
	if (flags & 1) {
	outBuf++ = inBuf++;
	} else {
	i = *inBuf++;
	j = *inBuf++;
	i \|= ((j & 0xf0) << 4);
	j = (j & 0x0f) + THRESHOLD;
	for (k = 0; k <= j; k++) {
	if (i > outBuf - outStart) {
	fprintf(stderr, "ERROR: bad LZSS pos %d at %ld\n",
	i, outBuf - outStart);
	delete[] outStart;
	return NULL;
	}
	outBuf = (outBuf - (i+1));
	outBuf++;
	}
	}
	}

	// Make sure we stopped where we expected to.
	if (outBuf - outStart != (long)decompLen) {
	fprintf(stderr, "ERROR: bad decomp len (%zd vs %zd)\n",
	outBuf - outStart, decompLen);
	delete[] outStart;
	return NULL;
	}

	return outStart;
	}

	//
	// Output a null-terminated numeric constant.
	//
	// Constants are always positive. Negative values are prefixed with a
	// unary '-' operator. (Negative 32-bit values are still possible, but
	// they seem intended as large positive values, e.g. fixed addresses in
	// the ROM.)
	//
	void OutputNumConst(const uint8_t** pData, size_t len, FILE* outfp) {
	const uint8_t* data = *pData;

	// 0x30 - 39 are '0' - '9'
	// 0x3a - 41 are 'A' - 'F'

	int val = 0;
	uint8_t ch;
	while ((ch = *data++) != 0 && len > 0) {
	int digit = ch - '0';
	if (digit < 0 \|\| digit > 15) {
	fprintf(stderr, "WARNING: malformed digit 0x%02x (%d)\n",
	ch, digit);
	}
	val = (val << 4) + digit;
	}

	// Arbitrary hex vs. decimal threshold.
	if (val >= 0 && val < HEX_THRESHOLD) {
	fprintf(outfp, "%d", val);
	if (false) {
	// show character value; not all that useful
	if (val >= 0x20 && val < 0x7f) {
	fprintf(outfp, "/%c/", val);
	}
	}
	} else {
	fprintf(outfp, "0x%x", val);
	if (true) {
	// format 32-bit alphanumeric constant
	char cstr[5];
	bool isConst = true;
	for (int i = 0; i < 4; i++) {
	uint8_t chk = (val >> ((3 - i) * 8)) & 0xff;
	cstr[i] = chk;
	if (chk < 'A' \|\| chk > 'Z') {
	isConst = false;
	}
	}
	cstr[4] = '\0';
	if (isConst) {
	fprintf(outfp, "/%s/", cstr);
	}
	}
	}

	*pData = data;
	}

	//
	// Output a null-terminated string constant.
	//
	void OutputStrConst(const uint8_t** pData, size_t len, FILE* outfp) {
	const uint8_t* data = *pData;

	fputc('"', outfp);
	uint8_t ch;
	while ((ch = *data++) != 0 && len-- > 0) {
	// TODO: escape '"', some others?
	if (ch == 0x0d) {
	// don't output CR as a literal
	fputs("\\r", outfp);
	} else {
	fputc(ch, outfp);
	}
	}
	fputc('"', outfp);

	*pData = data;
	}

	//
	// Output a null-terminated identifier.
	//
	void OutputIdentifier(const uint8_t** pData, size_t len, FILE* outfp) {
	const uint8_t* data = *pData;

	uint8_t ch;
	while ((ch = *data++) != 0 && len-- > 0) {
	fputc(ch, outfp);
	}

	*pData = data;
	}

	//
	// Convert tokenized data stream to C-like form.
	//
	int DetokenizeData(const uint8_t* decompData, size_t decompLen, FILE* outfp) {
	//
	// Most things are either output literally or with a trivial mapping,
	// but a few things are prefixes for additional data:
	//
	// 0x43 'C' - null-terminated numeric constant. Value is in hex, with
	// [0-9] for the decimal digits and [:-?] for the hex digits,
	// so you can trivially subtract '0' to get the digit.
	// 0x49 'I' - null-terminated identifier (e.g. variable name).
	// 0x53 'S' - null-terminated string constant.
	//
	// Script ends with 0xff 0xff 0x00?
	//
	// Except for the end marker, all bytes in the file are <= 0x7f.
	//

	// Tokens (naming slightly inconsistent... it's fine).
	const uint8_t TOK_EXCLAM = 0x21; // '!'
	const uint8_t TOK_PERCENT = 0x25; // '%'
	const uint8_t TOK_AND_AND = 0x26; // '&'
	const uint8_t TOK_PAREN_OPEN = 0x28; // '('
	const uint8_t TOK_PAREN_CLOSE = 0x29; // ')'
	const uint8_t TOK_ASTERISK = 0x2a; // '*'
	const uint8_t TOK_PLUS = 0x2b; // '+'
	const uint8_t TOK_COMMA = 0x2c; // ','
	const uint8_t TOK_MINUS = 0x2d; // '-'
	const uint8_t TOK_SLASH = 0x2f; // '/'
	const uint8_t TOK_SEMICOLON = 0x3b; // ';'
	const uint8_t TOK_LF_ANGLE = 0x3c; // '<'
	const uint8_t TOK_EQUAL = 0x3d; // '='
	const uint8_t TOK_RT_ANGLE = 0x3e; // '>'
	const uint8_t TOK_ADDR = 0x40; // '@'
	const uint8_t TOK_NUM_CONST = 0x43; // 'C'
	const uint8_t TOK_MINUSMINUS = 0x44; // 'D'
	const uint8_t TOK_EQUAL_EQUAL = 0x45; // 'E'
	const uint8_t TOK_GT_EQUAL = 0x47; // 'G'
	const uint8_t TOK_IDENT = 0x49; // 'I'
	const uint8_t TOK_LT_EQUAL = 0x4c; // 'L'
	const uint8_t TOK_NEQUAL = 0x4e; // 'N'
	const uint8_t TOK_PLUSPLUS = 0x50; // 'P'
	const uint8_t TOK_STR_CONST = 0x53; // 'S'
	const uint8_t TOK_BRACK_OPEN = 0x5b; // '['
	const uint8_t TOK_BRACK_CLOSE = 0x5d; // ']'
	const uint8_t TOK_BREAK = 0x62; // 'b'
	const uint8_t TOK_CHAR = 0x63; // 'c'
	const uint8_t TOK_LONG = 0x6c; // 'l'
	const uint8_t TOK_ELSE = 0x65; // 'e'
	const uint8_t TOK_FOR = 0x66; // 'f'
	const uint8_t TOK_IF = 0x69; // 'i'
	const uint8_t TOK_RETURN = 0x72; // 'r'
	const uint8_t TOK_WHILE = 0x77; // 'w'
	const uint8_t TOK_CURLY_OPEN = 0x7b; // '{'
	const uint8_t TOK_OR_OR = 0x7c; // '\|'
	const uint8_t TOK_CURLY_CLOSE = 0x7d; // '}'
	const uint8_t TOK_EOL = 0x7f; // DEL

	const uint8_t TOK_EOF = 0xff;

	const uint8_t* decompStart = decompData;

	int indent = 0;
	uint8_t token;
	while ((token = *decompData++) != TOK_EOF) {
	size_t remLen = decompLen - (decompData - decompStart);
	if (remLen < 0) {
	fprintf(stderr, "ERROR: ran off end of script\n");
	return 1;
	}

	switch (token) {
	case TOK_BREAK:
	fputs("break", outfp);
	break;
	case TOK_FOR:
	fputs("for ", outfp);
	break;
	case TOK_CHAR:
	fputs("char ", outfp);
	break;
	case TOK_ELSE:
	if (decompData >= decompStart + 2 && decompData[-2] == TOK_CURLY_CLOSE) {
	fputs(" else ", outfp);
	} else {
	fputs("else ", outfp);
	}
	break;
	case TOK_IF:
	fputs("if ", outfp);
	break;
	case TOK_LONG:
	fputs("int ", outfp);
	break;
	case TOK_RETURN:
	fputs("return ", outfp);
	break;
	case TOK_WHILE:
	fputs("while ", outfp);
	break;
	case TOK_MINUSMINUS:
	fputs("--", outfp);
	break;
	case TOK_PLUSPLUS:
	fputs("++", outfp);
	break;
	case TOK_NEQUAL:
	fputs(" != ", outfp);
	break;
	case TOK_EQUAL_EQUAL:
	fputs(" == ", outfp);
	break;
	case TOK_LT_EQUAL:
	fputs(" <= ", outfp);
	break;
	case TOK_GT_EQUAL:
	fputs(" >= ", outfp);
	break;
	case TOK_OR_OR:
	fputs(" \|\| ", outfp);
	break;
	case TOK_AND_AND:
	fputs(" && ", outfp);
	break;
	case TOK_ADDR:
	fputs("&", outfp);
	break;

	case TOK_NUM_CONST:
	OutputNumConst(&decompData, remLen, outfp);
	break;
	case TOK_STR_CONST:
	OutputStrConst(&decompData, remLen, outfp);
	break;
	case TOK_IDENT:
	OutputIdentifier(&decompData, remLen, outfp);
	break;
	case TOK_EXCLAM:
	case TOK_PAREN_OPEN:
	case TOK_PAREN_CLOSE:
	case TOK_ASTERISK: // TODO: add spaces when used for mult (but not deref)
	case TOK_SEMICOLON:
	case TOK_BRACK_OPEN:
	case TOK_BRACK_CLOSE:
	// copy literal
	fputc(token, outfp);
	break;

	case TOK_COMMA:
	// copy literal, add space
	fprintf(outfp, "%c ", token);
	break;

	case TOK_EQUAL:
	case TOK_PLUS:
	case TOK_MINUS: // TODO: space after looks wrong for unary minus
	case TOK_SLASH:
	case TOK_PERCENT:
	case TOK_LF_ANGLE:
	case TOK_RT_ANGLE:
	// copy literal, add spaces before and after
	fprintf(outfp, " %c ", token);
	break;

	case TOK_CURLY_OPEN:
	indent++;
	if (decompData >= decompStart + 2 &&
	decompData[-2] == TOK_PAREN_CLOSE) {
	// insert space in things like "if (blah){"
	fprintf(outfp, " %c", token);
	} else {
	fputc(token, outfp);
	}
	break;
	case TOK_CURLY_CLOSE:
	indent--;
	fputc(token, outfp);
	break;

	case TOK_EOL:
	fprintf(outfp, "\n");
	if (DEBUG) {
	// Map contents to hex dump.
	fprintf(outfp, "%04lx ", decompData - decompStart);
	}
	// Synthesize indentation (4 spaces per).
	{
	int spaces = indent * 4;
	if (decompData < decompStart + decompLen &&
	*decompData == TOK_CURLY_CLOSE) {
	// Un-indent closing braces at the start of a line.
	spaces -= 4;
	}
	for (int i = 0; i < spaces; i++) {
	fputc(' ', outfp);
	}
	}
	break;

	default:
	fprintf(outfp, "?%02x?", token);
	break;
	}
	}

	if (indent != 0) {
	// Check for imbalanced curly braces; shouldn't happen in valid script.
	fprintf(stderr, "WARNING: indentation level didn't zero out\n");
	}

	// Files seem to end in "0xff 0xff 0x00", and we stop at the first 0xff.
	if (decompData - decompStart < (int)decompLen - 2) {
	// early EOF?
	fprintf(stderr, "WARNING: only consumed %ld of %zd bytes\n",
	decompData - decompStart, decompLen);
	}

	return 0;
	}

	//
	// Convert a script from compressed + tokenized form to text.
	//
	int ProcessScript(const uint8_t* data, size_t length, FILE* outfp) {
	const uint8_t* start = data;

	// Not entirely sure what these are, other than the two lengths. The
	// hashes are likely CRC-32, but I haven't found a computation
	// that yields a match on the compressed or uncompressed data.
	uint32_t magic = Next4BE(&data); // +00 magic number
	uint32_t vers1 = Next4BE(&data); // +04 version number??
	uint32_t vers2 = Next4BE(&data); // +08 version number??
	uint32_t hash1 = Next4BE(&data); // +0c header checksum?
	uint32_t timestamp = Next4BE(&data); // +10 32-bit UNIX timestamp
	uint32_t compLen = Next4BE(&data); // +14 length of compressed data
	uint32_t decompLen = Next4BE(&data); // +18 length after decompression
	uint32_t decompCrc = Next4BE(&data); // +1c CRC on decompressed data
	uint32_t thing1 = Next4BE(&data); // +20 one-byte flags? padding?

	size_t headerLen = data - start;

	// Dump the header.
	printf("Input file is %zd bytes\n", length);
	printf("Header contents (%zd bytes):\n", headerLen);
	printf(" Magic: %c%c%c%c\n",
	(char)((magic >> 24) & 0xff),
	(char)((magic >> 16) & 0xff),
	(char)((magic >> 8) & 0xff),
	(char)(magic & 0xff));
	printf(" Version1: %d\n", vers1);
	printf(" Version2: %d\n", vers2);
	printf(" Hash1: %08x\n", hash1);
	time_t when = (time_t) timestamp;
	printf(" Timestamp: %08x - %s", timestamp, ctime(&when));
	printf(" CompLen: %u\n", compLen);
	printf(" DecompLen: %u\n", decompLen);
	printf(" DecompCRC: %08x\n", decompCrc);
	printf(" Thing1: %08x\n", thing1);

	if (decompLen < 1 \|\| decompLen > MAX_SCRIPT_LEN) {
	fprintf(stderr, "ERROR: invalid decompressed length\n");
	return 1;
	}

	// See if compressed length matches file length.
	if (compLen + headerLen > length) {
	// file truncated?
	fprintf(stderr, "ERROR: file too short (%zd) for compLen=%u\n",
	length, compLen);
	return 1;
	} else if (compLen + headerLen != length) {
	fprintf(stderr,
	"WARNING: comp len expected to be %u, file holds %zd\n",
	compLen, length - headerLen);
	// keep going
	}

	#if 0
	// Try a CRC on all header fields that follow hash1.
	uint32_t headerCheck = crc32x(start + 16, headerLen - 16);
	printf("header CRC: 0x%08x\n", headerCheck);

	// Try to check the compressed data CRC. Not sure if it's a CRC-32 or
	// what bytes the hash includes, so try a few.
	for (int i = 16; i <= 36; i += 4) {
	printf("computed CRC(+%d %ld)=0x%08x\n", i, length - i,
	crc32x(start + i, length - i));
	}
	#endif

	// Decompress to tokenized form.
	uint8_t* decompData = DecompressLzss(data, compLen, decompLen);
	if (decompData == NULL) {
	fprintf(stderr, "ERROR: LZSS decompression failed\n");
	return 1;
	}

	// Check CRC on decompressed data.
	uint32_t calcCrc = crc32x(decompData, decompLen);
	printf("Calculated decomp CRC=0x%08x (%s)\n", calcCrc,
	calcCrc == decompCrc ? "match" : "MISMATCH");

	// Dump tokenized form, for debugging.
	if (DEBUG) {
	printf("Writing decompressed data to %s\n", DEBUG_TOK_FILENAME);
	FILE* tokfp = fopen(DEBUG_TOK_FILENAME, "wb");
	if (tokfp != NULL) {
	WriteBuffer(tokfp, decompData, decompLen);
	}
	fclose(tokfp);
	}

	// Convert tokenized to text.
	int result = DetokenizeData(decompData, decompLen, outfp);

	delete[] decompData;
	return result;
	}

	//
	// Main entry point.
	//
	int main(int argc, char** argv) {
	if (argc != 3) {
	fprintf(stderr, "Usage: detok <infile.tok> <outfile.txt>\n");
	return 2;
	}

	FILE* infp = fopen(argv[1], "rb");
	if (infp == NULL) {
	perror("Unable to open input file");
	return 1;
	}

	size_t length;
	uint8_t* data = ReadFile(infp, &length);
	fclose(infp);
	if (data == NULL) {
	return 1;
	}

	FILE* outfp = fopen(argv[2], "w");
	if (outfp == NULL) {
	perror("Unable to open output file");
	return 1;
	}

	int result = ProcessScript(data, length, outfp);
	fclose(outfp);

	delete[] data;

	if (result == 0) {
	printf("Success\n");
	}
	return result;
	}