Skip to content

Instantly share code, notes, and snippets.

@brokenprogrammer
Last active April 7, 2022 06:30
Show Gist options
  • Select an option

  • Save brokenprogrammer/1550e1bb2ee4ec10d6d029d7d9415e32 to your computer and use it in GitHub Desktop.

Select an option

Save brokenprogrammer/1550e1bb2ee4ec10d6d029d7d9415e32 to your computer and use it in GitHub Desktop.
Simple LZW 16bit fixed width decoder
//
// This is a implementation of a simple lzw 16bit fixed width decoder
// that I created after listening to Casey Muratori's talk during stream
// LZW WTF. Within the talk Casey explains the idea behind a lzw decoder
// using the generated output as its string table instead of performing
// mallocs, using a linked list for strings followed by chasing
// pointers to output the string or relying on std::string which performs
// both malloc/free under the hood while decoding.
//
// I wanted to try out the approach he mentions in something simple so
// this is a toy application that can read lzw encoded input from a file
// and then output the decoded string into a new file.
//
// Worth to note is that in Casey's talk he mentions different codes
// such as a clear table code or keeping track of a bit count.
// For simplicity this decoder doesn't do any of that instead just
// does fixed 16 bitdecodes and then upon reaching string table
// cap of 4096 it just stops recording strings.
//
// Build with: cl.exe -nologo /Zi /FC lzw_decode.c /link -opt:ref -incremental:no /Debug:full /out:lzw.exe
//
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdbool.h>
#define ERROR_CODE 0xFFFF
uint8_t InitialTable[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60,
61, 62, 63, 64, 65, 66, 67, 68, 69, 70,
71, 72, 73, 74, 75, 76, 77, 78, 79, 80,
81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100,
101, 102, 103, 104, 105, 106, 107, 108, 109, 110,
111, 112, 113, 114, 115, 116, 117, 118, 119, 120,
121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
131, 132, 133, 134, 135, 136, 137, 138, 139, 140,
141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
161, 162, 163, 164, 165, 166, 167, 168, 169, 170,
171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
181, 182, 183, 184, 185, 186, 187, 188, 189, 190,
191, 192, 193, 194, 195, 196, 197, 198, 199, 200,
201, 202, 203, 204, 205, 206, 207, 208, 209, 210,
211, 212, 213, 214, 215, 216, 217, 218, 219, 220,
221, 222, 223, 224, 225, 226, 227, 228, 229, 230,
231, 232, 233, 234, 235, 236, 237, 238, 239, 240,
241, 242, 243, 244, 245, 246, 247, 248, 249, 250,
251, 252, 253, 254, 255, 256
};
typedef struct table_entry_ {
uint8_t *Start;
uint64_t Length;
} table_entry;
static bool
ReadNext16(char *Buffer, uint32_t BufferLength, uint32_t *Offset, uint16_t *Result)
{
*Result = 0;
if (BufferLength <= *Offset)
{
return false;
}
char Byte1 = *(Buffer + (*Offset)++);
*Result = Byte1 & 0xFF;
if (BufferLength <= *Offset)
{
return true;
}
char Byte2 = *(Buffer + (*Offset)++);
*Result = (*Result << 8) | (Byte2 & 0xFF) ;
return true;
}
static void
Decode(uint8_t *InputBuffer, uint32_t BufferLength, uint8_t *OutBuffer)
{
// NOTE(Oskar): Initial character table
table_entry Table[4096] = {0};
uint32_t TablePosition = 256;
// NOTE(Oskar): Build initial table
for (int i = 0; i <= 255; ++i)
{
Table[i].Start = &InitialTable[i];
Table[i].Length = 1;
}
uint32_t InputOffset = 0;
uint32_t OutputOffset = 0;
// TODO(Oskar): Perform initial read and put it directly into output.
uint16_t InitCode = ERROR_CODE;
if (!ReadNext16(InputBuffer, BufferLength, &InputOffset, &InitCode))
{
return;
}
OutBuffer[OutputOffset++] = (uint8_t)InitCode;
uint16_t PreviouCode = InitCode;
uint32_t PreviousOutputOffset = OutputOffset;
do
{
uint16_t Code = ERROR_CODE;
if (!ReadNext16(InputBuffer, BufferLength, &InputOffset, &Code))
{
break;
}
// NOTE(Oskar): Malformed buffer.
if (Code > TablePosition)
{
return;
}
uint32_t Next = TablePosition;
table_entry *PreviousEntry = &Table[PreviouCode];
// NOTE(Oskar): Have we seen this code before
if (Code < TablePosition)
{
// NOTE(Oskar): Store Table[Prev] + First character of Code in string table.
// then output Table[Code]
table_entry *ThisEntry = &Table[Code];
memcpy(OutBuffer + OutputOffset, ThisEntry->Start, ThisEntry->Length);
OutputOffset += ThisEntry->Length;
}
else
{
// NOTE(Oskar): Store Table[Prev] + First character of Prev in string table.
// then out put what you just stored.
// NOTE(Oskar): String within Table Prev
memcpy(OutBuffer + OutputOffset, PreviousEntry->Start, PreviousEntry->Length);
OutputOffset += PreviousEntry->Length;
// NOTE(Oskar): First Character of Prev
memcpy(OutBuffer + OutputOffset, PreviousEntry->Start, 1);
OutputOffset += 1;
}
// NOTE(Oskar): Store new entry in table based on what we just output in the OutputBuffer
// and increment table position.
if (TablePosition < 4096)
{
table_entry *Entry = &Table[Next];
Entry->Start = OutBuffer + PreviousOutputOffset - PreviousEntry->Length;
Entry->Length = PreviousEntry->Length + 1;
TablePosition++;
}
PreviousOutputOffset = OutputOffset;
PreviouCode = Code;
} while(1);
}
int main(int argc, char **argv)
{
FILE *In = stdin;
FILE *Out = stdout;
if (argc > 1) {
In = fopen(argv[1], "rb");
if (In == NULL) {
fprintf(stderr, "Can't open %s.\n", argv[1]);
return 1;
}
}
if (argc > 2) {
Out = fopen(argv[2], "wb");
if (Out == NULL) {
fprintf(stderr, "Can't open %s.\n", argv[2]);
return 1;
}
}
// NOTE(Oskar): Read entire file
fseek(In, 0, SEEK_END);
uint32_t FileSize = ftell(In);
fseek(In, 0, SEEK_SET);
void *FileContent = malloc(FileSize);
fread(FileContent, FileSize, 1, In);
fclose(In);
// NOTE(Oskar): For this sample I needed around 5.3MB.
uint8_t *OutputBuffer = malloc(53*1024*1024);
Decode(FileContent, FileSize, OutputBuffer);
int OutputLength = strlen(OutputBuffer);
fwrite(OutputBuffer, 1, OutputLength, Out);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment