bnnm/lz4.c

## lz4.c
// Decompresses LZ4 found in XNB (just a test tool for vgmstream).

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>

/* Decompresses LZ4 from MonoGame. The original C lib has a lot of modes and configs, but
 * MonoGame only uses the core 'block' part, which is a fairly simple LZ77 (has one command
 * to copy literal and window values, with variable copy lengths).
 *
 * This is a basic re-implementation (not tuned for performance) for practice/test purposes,
 * that handles streaming decompression as a state machine since we can run out of src or dst
 * bytes anytime and LZ4 allows any copy length, with copy window as a circular buffer. Not
 * sure what's the best/standard way to do it though. Info:
 * - https://github.com/lz4/lz4
 * - https://github.com/MonoGame/MonoGame/blob/develop/MonoGame.Framework/Utilities/Lz4Stream/Lz4DecoderStream.cs
 */

#define LZ4_ERROR           -1 /* internal error */
#define LZ4_OK_SRC_END      -2 /* ran out of input data */
#define LZ4_OK_DST_END      -3 /* ran out of output space */
#define LZ4_WINDOW_SIZE     (1 << 16)
#define LZ4_WINDOW_BYTES    2
#define LZ4_MIN_MATCH_LEN   4
#define LZ4_VARLEN_MARK     15
#define LZ4_VARLEN_CONTINUE 255


typedef enum {
    READ_TOKEN,
    READ_LITERAL,
    COPY_LITERAL,
    READ_OFFSET,
    READ_MATCH,
    SET_MATCH,
    COPY_MATCH
} lz4state_t;

typedef struct {
    lz4state_t state;

    uint8_t token;
    int literal_len;
    int offset_cur;
    int offset_pos;
    int match_len;
    int match_pos;

    int window_pos;
    uint8_t window[LZ4_WINDOW_SIZE];
} lz4context_t;

typedef struct {
    lz4context_t ctx;
    uint8_t* dst;
    const uint8_t* src;
    int dst_size;
    int src_size;
    int dst_done;
    int src_done;
} lz4stream_t;

static void reset_lz4mg(lz4stream_t* strm) {
    memset(strm, 0, sizeof(lz4stream_t));
}

/* Decompress src into dst, returning a code and number of bytes used. Caller must handle
 * stop (when no more input data or all data has been decompressed) as LZ4 has no end marker. */
static int decompress_lz4mg(lz4stream_t* strm) {
    lz4context_t* ctx = &strm->ctx;
    uint8_t* dst = strm->dst;
    const uint8_t* src = strm->src;
    int dst_size = strm->dst_size;
    int src_size = strm->src_size;
    int dst_pos = strm->dst_done;
    int src_pos = strm->src_done;
    uint8_t next_len, next_val;


    while (1) {
        /* mostly linear state machine, but it may break anytime when reaching dst or src
         * end, and resume from same state in next call */
        switch(ctx->state) {

            case READ_TOKEN:
                if (src_pos >= src_size)
                    goto buffer_end;
                ctx->token = src[src_pos++];

                ctx->literal_len = (ctx->token >> 4) & 0xF;
                if (ctx->literal_len == LZ4_VARLEN_MARK)
                    ctx->state = READ_LITERAL;
                else
                    ctx->state = COPY_LITERAL;
                break;

            case READ_LITERAL:
                do {
                    if (src_pos >= src_size)
                        goto buffer_end;
                    next_len = src[src_pos++];
                    ctx->literal_len += next_len;
                } while (next_len == LZ4_VARLEN_CONTINUE);

                ctx->state = COPY_LITERAL;
                break;

            case COPY_LITERAL:
                while (ctx->literal_len > 0) { /* may be 0 */
                    if (src_pos >= src_size || dst_pos >= dst_size)
                        goto buffer_end;
                    next_val = src[src_pos++];

                    dst[dst_pos++] = next_val;

                    ctx->window[ctx->window_pos++] = next_val;
                    if (ctx->window_pos == LZ4_WINDOW_SIZE)
                        ctx->window_pos = 0;

                    ctx->literal_len--;
                };

                /* LZ4 is designed to reach EOF with a literal in this state with some empty values */

                ctx->offset_cur = 0;
                ctx->offset_pos = 0;
                ctx->state = READ_OFFSET;
                break;

            case READ_OFFSET:
                do {
                    if (src_pos >= src_size)
                        goto buffer_end;
                    ctx->offset_pos |= (src[src_pos++] << ctx->offset_cur*8);
                    ctx->offset_cur++;
                } while (ctx->offset_cur < LZ4_WINDOW_BYTES);

                ctx->match_len = (ctx->token & 0xF);
                if (ctx->match_len == LZ4_VARLEN_MARK)
                    ctx->state = READ_MATCH;
                else
                    ctx->state = SET_MATCH;
                break;

            case READ_MATCH:
                do {
                    if (src_pos >= src_size)
                        goto buffer_end;
                    next_len = src[src_pos++];
                    ctx->match_len += next_len;
                    if (src_pos > src_size)
                        break;
                } while (next_len == LZ4_VARLEN_CONTINUE);

                ctx->state = SET_MATCH;
                break;

            case SET_MATCH:
                ctx->match_len += LZ4_MIN_MATCH_LEN;

                ctx->match_pos = ctx->window_pos - ctx->offset_pos;
                if (ctx->match_pos < 0) /* circular buffer so negative is from window end */
                    ctx->match_pos = LZ4_WINDOW_SIZE + ctx->match_pos;

                ctx->state = COPY_MATCH;
                break;

            case COPY_MATCH:
                 while (ctx->match_len > 0) {
                    if (dst_pos >= dst_size)
                        goto buffer_end;

                    next_val = ctx->window[ctx->match_pos++];
                    if (ctx->match_pos == LZ4_WINDOW_SIZE)
                        ctx->match_pos = 0;

                    dst[dst_pos++] = next_val;

                    ctx->window[ctx->window_pos++] = next_val;
                    if (ctx->window_pos == LZ4_WINDOW_SIZE)
                        ctx->window_pos = 0;

                    ctx->match_len--;
                };

                ctx->state = READ_TOKEN;
                break;

            default:
                goto buffer_end;
        }
	}

buffer_end:
    strm->dst_done = dst_pos;
    strm->src_done = src_pos;

    if (src_pos >= src_size)
        return LZ4_OK_SRC_END;
    if (dst_pos >= dst_size)
        return LZ4_OK_DST_END;

    /* shouldn't reach this */
    return LZ4_ERROR;
}

uint32_t read_u32le(uint8_t* buf) {
    return (buf[0] << 0) | (buf[1] << 8u) | (buf[2] << 16u) | (buf[3] << 24u);
}

int main(int argc, char *argv[]) {
    if (argc < 2) {
        printf("LZ4 decoder by bnnm\nUsage: %s infile\n", argv[0]);
        return 1;
    }

    FILE* f_out = NULL;
    FILE* f_in = NULL;
    uint8_t *src = NULL;
    uint8_t *dst = NULL;

    f_in = fopen(argv[1], "rb");
    if (f_in == NULL) {
        printf("cannot open infile %s", argv[1]);
        goto end;
    }

    char s_out[32768];
    strcpy(s_out, argv[1]);
    strcat(s_out, ".dec");
    f_out = fopen(s_out, "wb");
    if (f_out == NULL) {
        printf("cannot open outfile %s", s_out);
        goto end;
    }

    fseek(f_in, 0, SEEK_END);
    int src_size = ftell(f_in);
    fseek(f_in, 0, SEEK_SET);

    src = malloc(src_size);
    if (!src) {
        printf("bad alloc");
        goto end;
    }

    fread(src, 1, src_size, f_in);

    int dst_size = read_u32le(src + 0x0a);
    dst = malloc(dst_size);
    if (!dst) {
        printf("bad alloc");
        return 0;
    }


    lz4stream_t strm;

#if 1
    reset_lz4mg(&strm);
    strm.dst = dst;
    strm.src = src + 0x0e;
    strm.dst_size = dst_size;
    strm.src_size = src_size - 0x0e;

    decompress_lz4mg(&strm);
    fwrite(dst, sizeof(char), strm.dst_done, f_out);

#else
    fseek(f_in, 0x0e, SEEK_SET);

    uint8_t dst2[0x1000];
    uint8_t src2[0x1000];
    int ok, bytes = 0;

    reset_lz4mg(&strm);
    strm.dst = dst2;
    strm.src = src2;
    strm.dst_size = sizeof(dst2);
    strm.src_size = sizeof(src2);

    int src_read = 1;

    while (bytes < dst_size) {

        if (src_read) {
            src_read = 0;
            strm.src_done = 0;
            strm.src_size = fread(src2, sizeof(uint8_t), sizeof(src2), f_in);
            if (strm.src_size == 0) { /* EOF */
                printf("EOF reached\n");
                break;
            }
        }


        ok = decompress_lz4mg(&strm);

        fwrite(strm.dst, sizeof(uint8_t), strm.dst_done, f_out);
        bytes += strm.dst_done;
        strm.dst_done = 0;

        if (ok == LZ4_ERROR) {
            printf("LZ4: error decompressing\n");
            break;
        }
        else if (ok == LZ4_OK_SRC_END) {
            src_read = 1;
        }
        /* no need to handle dst end as we just write whatever it has and reset */
        //if (ok == LZ4_OK_DST_END) { ...  }
    }
#endif

end:
    free(src);
    free(dst);
    fclose(f_in);
    fclose(f_out);
    return 0;
}
	// Decompresses LZ4 found in XNB (just a test tool for vgmstream).

	#include <stdio.h>
	#include <stdlib.h>
	#include <stdint.h>
	#include <string.h>

	/* Decompresses LZ4 from MonoGame. The original C lib has a lot of modes and configs, but
	* MonoGame only uses the core 'block' part, which is a fairly simple LZ77 (has one command
	* to copy literal and window values, with variable copy lengths).
	*
	* This is a basic re-implementation (not tuned for performance) for practice/test purposes,
	* that handles streaming decompression as a state machine since we can run out of src or dst
	* bytes anytime and LZ4 allows any copy length, with copy window as a circular buffer. Not
	* sure what's the best/standard way to do it though. Info:
	* - https://github.com/lz4/lz4
	* - https://github.com/MonoGame/MonoGame/blob/develop/MonoGame.Framework/Utilities/Lz4Stream/Lz4DecoderStream.cs
	*/

	#define LZ4_ERROR -1 /* internal error */
	#define LZ4_OK_SRC_END -2 /* ran out of input data */
	#define LZ4_OK_DST_END -3 /* ran out of output space */
	#define LZ4_WINDOW_SIZE (1 << 16)
	#define LZ4_WINDOW_BYTES 2
	#define LZ4_MIN_MATCH_LEN 4
	#define LZ4_VARLEN_MARK 15
	#define LZ4_VARLEN_CONTINUE 255


	typedef enum {
	READ_TOKEN,
	READ_LITERAL,
	COPY_LITERAL,
	READ_OFFSET,
	READ_MATCH,
	SET_MATCH,
	COPY_MATCH
	} lz4state_t;

	typedef struct {
	lz4state_t state;

	uint8_t token;
	int literal_len;
	int offset_cur;
	int offset_pos;
	int match_len;
	int match_pos;

	int window_pos;
	uint8_t window[LZ4_WINDOW_SIZE];
	} lz4context_t;

	typedef struct {
	lz4context_t ctx;
	uint8_t* dst;
	const uint8_t* src;
	int dst_size;
	int src_size;
	int dst_done;
	int src_done;
	} lz4stream_t;

	static void reset_lz4mg(lz4stream_t* strm) {
	memset(strm, 0, sizeof(lz4stream_t));
	}

	/* Decompress src into dst, returning a code and number of bytes used. Caller must handle
	* stop (when no more input data or all data has been decompressed) as LZ4 has no end marker. */
	static int decompress_lz4mg(lz4stream_t* strm) {
	lz4context_t* ctx = &strm->ctx;
	uint8_t* dst = strm->dst;
	const uint8_t* src = strm->src;
	int dst_size = strm->dst_size;
	int src_size = strm->src_size;
	int dst_pos = strm->dst_done;
	int src_pos = strm->src_done;
	uint8_t next_len, next_val;


	while (1) {
	/* mostly linear state machine, but it may break anytime when reaching dst or src
	* end, and resume from same state in next call */
	switch(ctx->state) {

	case READ_TOKEN:
	if (src_pos >= src_size)
	goto buffer_end;
	ctx->token = src[src_pos++];

	ctx->literal_len = (ctx->token >> 4) & 0xF;
	if (ctx->literal_len == LZ4_VARLEN_MARK)
	ctx->state = READ_LITERAL;
	else
	ctx->state = COPY_LITERAL;
	break;

	case READ_LITERAL:
	do {
	if (src_pos >= src_size)
	goto buffer_end;
	next_len = src[src_pos++];
	ctx->literal_len += next_len;
	} while (next_len == LZ4_VARLEN_CONTINUE);

	ctx->state = COPY_LITERAL;
	break;

	case COPY_LITERAL:
	while (ctx->literal_len > 0) { /* may be 0 */
	if (src_pos >= src_size \|\| dst_pos >= dst_size)
	goto buffer_end;
	next_val = src[src_pos++];

	dst[dst_pos++] = next_val;

	ctx->window[ctx->window_pos++] = next_val;
	if (ctx->window_pos == LZ4_WINDOW_SIZE)
	ctx->window_pos = 0;

	ctx->literal_len--;
	};

	/* LZ4 is designed to reach EOF with a literal in this state with some empty values */

	ctx->offset_cur = 0;
	ctx->offset_pos = 0;
	ctx->state = READ_OFFSET;
	break;

	case READ_OFFSET:
	do {
	if (src_pos >= src_size)
	goto buffer_end;
	ctx->offset_pos \|= (src[src_pos++] << ctx->offset_cur*8);
	ctx->offset_cur++;
	} while (ctx->offset_cur < LZ4_WINDOW_BYTES);

	ctx->match_len = (ctx->token & 0xF);
	if (ctx->match_len == LZ4_VARLEN_MARK)
	ctx->state = READ_MATCH;
	else
	ctx->state = SET_MATCH;
	break;

	case READ_MATCH:
	do {
	if (src_pos >= src_size)
	goto buffer_end;
	next_len = src[src_pos++];
	ctx->match_len += next_len;
	if (src_pos > src_size)
	break;
	} while (next_len == LZ4_VARLEN_CONTINUE);

	ctx->state = SET_MATCH;
	break;

	case SET_MATCH:
	ctx->match_len += LZ4_MIN_MATCH_LEN;

	ctx->match_pos = ctx->window_pos - ctx->offset_pos;
	if (ctx->match_pos < 0) /* circular buffer so negative is from window end */
	ctx->match_pos = LZ4_WINDOW_SIZE + ctx->match_pos;

	ctx->state = COPY_MATCH;
	break;

	case COPY_MATCH:
	while (ctx->match_len > 0) {
	if (dst_pos >= dst_size)
	goto buffer_end;

	next_val = ctx->window[ctx->match_pos++];
	if (ctx->match_pos == LZ4_WINDOW_SIZE)
	ctx->match_pos = 0;

	dst[dst_pos++] = next_val;

	ctx->window[ctx->window_pos++] = next_val;
	if (ctx->window_pos == LZ4_WINDOW_SIZE)
	ctx->window_pos = 0;

	ctx->match_len--;
	};

	ctx->state = READ_TOKEN;
	break;

	default:
	goto buffer_end;
	}
	}

	buffer_end:
	strm->dst_done = dst_pos;
	strm->src_done = src_pos;

	if (src_pos >= src_size)
	return LZ4_OK_SRC_END;
	if (dst_pos >= dst_size)
	return LZ4_OK_DST_END;

	/* shouldn't reach this */
	return LZ4_ERROR;
	}

	uint32_t read_u32le(uint8_t* buf) {
	return (buf[0] << 0) \| (buf[1] << 8u) \| (buf[2] << 16u) \| (buf[3] << 24u);
	}

	int main(int argc, char *argv[]) {
	if (argc < 2) {
	printf("LZ4 decoder by bnnm\nUsage: %s infile\n", argv[0]);
	return 1;
	}

	FILE* f_out = NULL;
	FILE* f_in = NULL;
	uint8_t *src = NULL;
	uint8_t *dst = NULL;

	f_in = fopen(argv[1], "rb");
	if (f_in == NULL) {
	printf("cannot open infile %s", argv[1]);
	goto end;
	}

	char s_out[32768];
	strcpy(s_out, argv[1]);
	strcat(s_out, ".dec");
	f_out = fopen(s_out, "wb");
	if (f_out == NULL) {
	printf("cannot open outfile %s", s_out);
	goto end;
	}

	fseek(f_in, 0, SEEK_END);
	int src_size = ftell(f_in);
	fseek(f_in, 0, SEEK_SET);

	src = malloc(src_size);
	if (!src) {
	printf("bad alloc");
	goto end;
	}

	fread(src, 1, src_size, f_in);

	int dst_size = read_u32le(src + 0x0a);
	dst = malloc(dst_size);
	if (!dst) {
	printf("bad alloc");
	return 0;
	}


	lz4stream_t strm;

	#if 1
	reset_lz4mg(&strm);
	strm.dst = dst;
	strm.src = src + 0x0e;
	strm.dst_size = dst_size;
	strm.src_size = src_size - 0x0e;

	decompress_lz4mg(&strm);
	fwrite(dst, sizeof(char), strm.dst_done, f_out);

	#else
	fseek(f_in, 0x0e, SEEK_SET);

	uint8_t dst2[0x1000];
	uint8_t src2[0x1000];
	int ok, bytes = 0;

	reset_lz4mg(&strm);
	strm.dst = dst2;
	strm.src = src2;
	strm.dst_size = sizeof(dst2);
	strm.src_size = sizeof(src2);

	int src_read = 1;

	while (bytes < dst_size) {

	if (src_read) {
	src_read = 0;
	strm.src_done = 0;
	strm.src_size = fread(src2, sizeof(uint8_t), sizeof(src2), f_in);
	if (strm.src_size == 0) { /* EOF */
	printf("EOF reached\n");
	break;
	}
	}


	ok = decompress_lz4mg(&strm);

	fwrite(strm.dst, sizeof(uint8_t), strm.dst_done, f_out);
	bytes += strm.dst_done;
	strm.dst_done = 0;

	if (ok == LZ4_ERROR) {
	printf("LZ4: error decompressing\n");
	break;
	}
	else if (ok == LZ4_OK_SRC_END) {
	src_read = 1;
	}
	/* no need to handle dst end as we just write whatever it has and reset */
	//if (ok == LZ4_OK_DST_END) { ... }
	}
	#endif

	end:
	free(src);
	free(dst);
	fclose(f_in);
	fclose(f_out);
	return 0;
	}