Last active
April 7, 2022 06:30
-
-
Save brokenprogrammer/1550e1bb2ee4ec10d6d029d7d9415e32 to your computer and use it in GitHub Desktop.
Simple LZW 16bit fixed width decoder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // | |
| // This is a implementation of a simple lzw 16bit fixed width decoder | |
| // that I created after listening to Casey Muratori's talk during stream | |
| // LZW WTF. Within the talk Casey explains the idea behind a lzw decoder | |
| // using the generated output as its string table instead of performing | |
| // mallocs, using a linked list for strings followed by chasing | |
| // pointers to output the string or relying on std::string which performs | |
| // both malloc/free under the hood while decoding. | |
| // | |
| // I wanted to try out the approach he mentions in something simple so | |
| // this is a toy application that can read lzw encoded input from a file | |
| // and then output the decoded string into a new file. | |
| // | |
| // Worth to note is that in Casey's talk he mentions different codes | |
| // such as a clear table code or keeping track of a bit count. | |
| // For simplicity this decoder doesn't do any of that instead just | |
| // does fixed 16 bitdecodes and then upon reaching string table | |
| // cap of 4096 it just stops recording strings. | |
| // | |
| // Build with: cl.exe -nologo /Zi /FC lzw_decode.c /link -opt:ref -incremental:no /Debug:full /out:lzw.exe | |
| // | |
| #include <stdio.h> | |
| #include <stdint.h> | |
| #include <stdlib.h> | |
| #include <stdbool.h> | |
| #define ERROR_CODE 0xFFFF | |
| uint8_t InitialTable[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, | |
| 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, | |
| 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, | |
| 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, | |
| 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, | |
| 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, | |
| 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, | |
| 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, | |
| 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, | |
| 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, | |
| 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, | |
| 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, | |
| 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, | |
| 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, | |
| 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, | |
| 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, | |
| 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, | |
| 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, | |
| 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, | |
| 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, | |
| 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, | |
| 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, | |
| 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, | |
| 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, | |
| 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, | |
| 251, 252, 253, 254, 255, 256 | |
| }; | |
| typedef struct table_entry_ { | |
| uint8_t *Start; | |
| uint64_t Length; | |
| } table_entry; | |
| static bool | |
| ReadNext16(char *Buffer, uint32_t BufferLength, uint32_t *Offset, uint16_t *Result) | |
| { | |
| *Result = 0; | |
| if (BufferLength <= *Offset) | |
| { | |
| return false; | |
| } | |
| char Byte1 = *(Buffer + (*Offset)++); | |
| *Result = Byte1 & 0xFF; | |
| if (BufferLength <= *Offset) | |
| { | |
| return true; | |
| } | |
| char Byte2 = *(Buffer + (*Offset)++); | |
| *Result = (*Result << 8) | (Byte2 & 0xFF) ; | |
| return true; | |
| } | |
| static void | |
| Decode(uint8_t *InputBuffer, uint32_t BufferLength, uint8_t *OutBuffer) | |
| { | |
| // NOTE(Oskar): Initial character table | |
| table_entry Table[4096] = {0}; | |
| uint32_t TablePosition = 256; | |
| // NOTE(Oskar): Build initial table | |
| for (int i = 0; i <= 255; ++i) | |
| { | |
| Table[i].Start = &InitialTable[i]; | |
| Table[i].Length = 1; | |
| } | |
| uint32_t InputOffset = 0; | |
| uint32_t OutputOffset = 0; | |
| // TODO(Oskar): Perform initial read and put it directly into output. | |
| uint16_t InitCode = ERROR_CODE; | |
| if (!ReadNext16(InputBuffer, BufferLength, &InputOffset, &InitCode)) | |
| { | |
| return; | |
| } | |
| OutBuffer[OutputOffset++] = (uint8_t)InitCode; | |
| uint16_t PreviouCode = InitCode; | |
| uint32_t PreviousOutputOffset = OutputOffset; | |
| do | |
| { | |
| uint16_t Code = ERROR_CODE; | |
| if (!ReadNext16(InputBuffer, BufferLength, &InputOffset, &Code)) | |
| { | |
| break; | |
| } | |
| // NOTE(Oskar): Malformed buffer. | |
| if (Code > TablePosition) | |
| { | |
| return; | |
| } | |
| uint32_t Next = TablePosition; | |
| table_entry *PreviousEntry = &Table[PreviouCode]; | |
| // NOTE(Oskar): Have we seen this code before | |
| if (Code < TablePosition) | |
| { | |
| // NOTE(Oskar): Store Table[Prev] + First character of Code in string table. | |
| // then output Table[Code] | |
| table_entry *ThisEntry = &Table[Code]; | |
| memcpy(OutBuffer + OutputOffset, ThisEntry->Start, ThisEntry->Length); | |
| OutputOffset += ThisEntry->Length; | |
| } | |
| else | |
| { | |
| // NOTE(Oskar): Store Table[Prev] + First character of Prev in string table. | |
| // then out put what you just stored. | |
| // NOTE(Oskar): String within Table Prev | |
| memcpy(OutBuffer + OutputOffset, PreviousEntry->Start, PreviousEntry->Length); | |
| OutputOffset += PreviousEntry->Length; | |
| // NOTE(Oskar): First Character of Prev | |
| memcpy(OutBuffer + OutputOffset, PreviousEntry->Start, 1); | |
| OutputOffset += 1; | |
| } | |
| // NOTE(Oskar): Store new entry in table based on what we just output in the OutputBuffer | |
| // and increment table position. | |
| if (TablePosition < 4096) | |
| { | |
| table_entry *Entry = &Table[Next]; | |
| Entry->Start = OutBuffer + PreviousOutputOffset - PreviousEntry->Length; | |
| Entry->Length = PreviousEntry->Length + 1; | |
| TablePosition++; | |
| } | |
| PreviousOutputOffset = OutputOffset; | |
| PreviouCode = Code; | |
| } while(1); | |
| } | |
| int main(int argc, char **argv) | |
| { | |
| FILE *In = stdin; | |
| FILE *Out = stdout; | |
| if (argc > 1) { | |
| In = fopen(argv[1], "rb"); | |
| if (In == NULL) { | |
| fprintf(stderr, "Can't open %s.\n", argv[1]); | |
| return 1; | |
| } | |
| } | |
| if (argc > 2) { | |
| Out = fopen(argv[2], "wb"); | |
| if (Out == NULL) { | |
| fprintf(stderr, "Can't open %s.\n", argv[2]); | |
| return 1; | |
| } | |
| } | |
| // NOTE(Oskar): Read entire file | |
| fseek(In, 0, SEEK_END); | |
| uint32_t FileSize = ftell(In); | |
| fseek(In, 0, SEEK_SET); | |
| void *FileContent = malloc(FileSize); | |
| fread(FileContent, FileSize, 1, In); | |
| fclose(In); | |
| // NOTE(Oskar): For this sample I needed around 5.3MB. | |
| uint8_t *OutputBuffer = malloc(53*1024*1024); | |
| Decode(FileContent, FileSize, OutputBuffer); | |
| int OutputLength = strlen(OutputBuffer); | |
| fwrite(OutputBuffer, 1, OutputLength, Out); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment