Last active
December 21, 2023 21:52
-
-
Save benanil/4dad093532bf1b254f1ebf752afb9eb8 to your computer and use it in GitHub Desktop.
LZ77 compressor and decompressor
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifdef _MSC_VER | |
#include <intrin.h> // __movsb | |
#endif | |
#include <memory.h> | |
#include <stdint.h> | |
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__) | |
# define AX_LIKELY(x) __builtin_expect(x, 1) | |
# define AX_UNLIKELY(x) __builtin_expect(x, 0) | |
#else | |
# define AX_LIKELY(x) (x) | |
# define AX_UNLIKELY(x) (x) | |
#endif | |
// using macro if less than 17 because we want this to be constexpr | |
#ifndef MIN | |
# define MIN(a, b) ((a) < (b) ? (a) : (b)) | |
# define MAX(a, b) ((a) > (b) ? (a) : (b)) | |
#endif | |
inline void SmallMemCpy(void* dst, const void* src, uint64_t size) | |
{ | |
#ifdef _MSC_VER | |
__movsb((unsigned char*)dst, (unsigned char*)src, size); | |
#else | |
__builtin_memcpy(dst, src, size); | |
#endif | |
} | |
#define LZ77Allocate(count) malloc(count) | |
#define LZ77Deallocate(ptr) free(ptr) | |
typedef struct LZ77Token_ | |
{ | |
short offset; | |
unsigned char len; | |
char c; | |
} LZ77Token; | |
typedef struct LZ77Stack_ | |
{ | |
int size; | |
int capacity; | |
LZ77Token* ptr; | |
} LZ77Stack; | |
const int LZ77WindowMaxSize = 4096; | |
const int LZ77LookAheadBufferSize = 255; | |
const int LZ77MinStackSize = 16; | |
LZ77Token* LZ77Reallocate(LZ77Token* ptr, int oldCount, int count) | |
{ | |
LZ77Token* old = ptr; | |
LZ77Token* nev = (LZ77Token*)LZ77Allocate(count * sizeof(LZ77Token)); | |
for (int i = 0; i < MIN(oldCount, count); i++) | |
{ | |
nev[i] = old[i]; | |
} | |
LZ77Deallocate(old); | |
return nev; | |
} | |
inline int LZ77CalculateArrayGrowth(int _size) | |
{ | |
const int addition = _size; | |
if (AX_UNLIKELY(_size > (INT32_MAX - addition))) { | |
return INT32_MAX; // growth would overflow | |
} | |
return _size + addition; // growth is sufficient | |
} | |
void LZ77StackPush(LZ77Stack* stack, LZ77Token token) | |
{ | |
if (stack->size + 1 >= stack->capacity) | |
{ | |
int newSize = LZ77CalculateArrayGrowth(stack->capacity); | |
stack->ptr = LZ77Reallocate(stack->ptr, stack->capacity, newSize); | |
stack->capacity = newSize; | |
} | |
stack->ptr[stack->size++] = token; | |
} | |
// usage: | |
// LZ77Token* tokens; // -> should be nullptr, function returns compressed result to this buffer | |
// int numTokens = LZ77Compress(input, sizeof(input), &tokens); | |
// after compression: compressedSize = numTokens * sizeof(LZ77Token) | |
int LZ77Compress(const void* input, int size, LZ77Token** out) | |
{ | |
int inputPos = 0; | |
int windowSize = 0; | |
LZ77Stack tokens; | |
tokens.ptr = (LZ77Token*)LZ77Allocate(LZ77MinStackSize * sizeof(LZ77Token)); | |
tokens.capacity = LZ77MinStackSize; | |
tokens.size = 0; | |
const char* in = (const char*)input; | |
while (inputPos < size) | |
{ | |
int lookAheadSize = MIN(LZ77LookAheadBufferSize, size - inputPos); | |
int maxLen = 0; | |
int maxPos = -1; | |
int searchSize = MIN(windowSize, lookAheadSize); | |
for (int i = 0; i < searchSize - maxLen; i++) | |
{ | |
int j = 0; | |
int w = MAX(inputPos - windowSize, 0); | |
while (in[w + i + j] == in[inputPos + j] && w+i+j < inputPos && j < LZ77LookAheadBufferSize) | |
j++; | |
if (j > maxLen) | |
{ | |
maxLen = j; | |
maxPos = i; | |
} | |
} | |
LZ77Token token; | |
if (maxPos != -1) | |
{ | |
token.len = maxLen; | |
token.offset = windowSize - maxPos; | |
token.c = in[inputPos + maxLen]; | |
windowSize += maxLen; | |
inputPos += maxLen + 1; | |
} | |
else | |
{ | |
token.len = 0; | |
token.offset = 0; | |
token.c = in[inputPos]; | |
inputPos += 1; | |
windowSize++; | |
} | |
windowSize = MIN(windowSize, LZ77WindowMaxSize); | |
LZ77StackPush(&tokens, token); | |
} | |
int numTokens = tokens.size; | |
*out = tokens.ptr; | |
return numTokens; | |
} | |
// usage: | |
// int* decompressed = (int*)LZ77Decompress(tokens, numTokens); | |
char* LZ77Decompress(LZ77Token* tokens, int numTokens) | |
{ | |
int totalSize = 0; | |
for (int i = 0; i < numTokens; i++) | |
totalSize += tokens[i].len + 1; | |
char* result = (char*)LZ77Allocate(totalSize); | |
char* curr = result; | |
for (int i = 0; i < numTokens; i++) | |
{ | |
if (tokens[i].len) | |
{ | |
SmallMemCpy(curr, curr - tokens[i].offset, tokens[i].len); | |
curr += tokens[i].len; | |
} | |
*curr++ = tokens[i].c; | |
} | |
return result; | |
} | |
#ifdef 0 // test | |
#include <stdio.h> | |
#include <assert.h> | |
int main() | |
{ | |
int input[1024]={0}; | |
LZ77Token* tokens; | |
int numTokens = LZ77Compress(input, sizeof(input), &tokens); | |
int* decompressed = (int*)LZ77Decompress(tokens, numTokens); | |
for (int i = 0; i < sizeof(input) / sizeof(int); i++) | |
{ | |
assert(input[i] == decompressed[i]); | |
} | |
printf("input size: %lu, compressed Size: %lu", sizeof(input), sizeof(LZ77Token) * numTokens); | |
LZ77Deallocate(decompressed); | |
LZ77Deallocate(tokens); | |
return 0; | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment