Skip to content

Instantly share code, notes, and snippets.

@benanil
Last active December 21, 2023 21:52
Show Gist options
  • Save benanil/4dad093532bf1b254f1ebf752afb9eb8 to your computer and use it in GitHub Desktop.
Save benanil/4dad093532bf1b254f1ebf752afb9eb8 to your computer and use it in GitHub Desktop.
LZ77 compressor and decompressor
#ifdef _MSC_VER
#include <intrin.h> // __movsb
#endif
#include <memory.h>
#include <stdint.h>
#if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
# define AX_LIKELY(x) __builtin_expect(x, 1)
# define AX_UNLIKELY(x) __builtin_expect(x, 0)
#else
# define AX_LIKELY(x) (x)
# define AX_UNLIKELY(x) (x)
#endif
// using macro if less than 17 because we want this to be constexpr
#ifndef MIN
# define MIN(a, b) ((a) < (b) ? (a) : (b))
# define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
inline void SmallMemCpy(void* dst, const void* src, uint64_t size)
{
#ifdef _MSC_VER
__movsb((unsigned char*)dst, (unsigned char*)src, size);
#else
__builtin_memcpy(dst, src, size);
#endif
}
#define LZ77Allocate(count) malloc(count)
#define LZ77Deallocate(ptr) free(ptr)
typedef struct LZ77Token_
{
short offset;
unsigned char len;
char c;
} LZ77Token;
typedef struct LZ77Stack_
{
int size;
int capacity;
LZ77Token* ptr;
} LZ77Stack;
const int LZ77WindowMaxSize = 4096;
const int LZ77LookAheadBufferSize = 255;
const int LZ77MinStackSize = 16;
LZ77Token* LZ77Reallocate(LZ77Token* ptr, int oldCount, int count)
{
LZ77Token* old = ptr;
LZ77Token* nev = (LZ77Token*)LZ77Allocate(count * sizeof(LZ77Token));
for (int i = 0; i < MIN(oldCount, count); i++)
{
nev[i] = old[i];
}
LZ77Deallocate(old);
return nev;
}
inline int LZ77CalculateArrayGrowth(int _size)
{
const int addition = _size;
if (AX_UNLIKELY(_size > (INT32_MAX - addition))) {
return INT32_MAX; // growth would overflow
}
return _size + addition; // growth is sufficient
}
void LZ77StackPush(LZ77Stack* stack, LZ77Token token)
{
if (stack->size + 1 >= stack->capacity)
{
int newSize = LZ77CalculateArrayGrowth(stack->capacity);
stack->ptr = LZ77Reallocate(stack->ptr, stack->capacity, newSize);
stack->capacity = newSize;
}
stack->ptr[stack->size++] = token;
}
// usage:
// LZ77Token* tokens; // -> should be nullptr, function returns compressed result to this buffer
// int numTokens = LZ77Compress(input, sizeof(input), &tokens);
// after compression: compressedSize = numTokens * sizeof(LZ77Token)
int LZ77Compress(const void* input, int size, LZ77Token** out)
{
int inputPos = 0;
int windowSize = 0;
LZ77Stack tokens;
tokens.ptr = (LZ77Token*)LZ77Allocate(LZ77MinStackSize * sizeof(LZ77Token));
tokens.capacity = LZ77MinStackSize;
tokens.size = 0;
const char* in = (const char*)input;
while (inputPos < size)
{
int lookAheadSize = MIN(LZ77LookAheadBufferSize, size - inputPos);
int maxLen = 0;
int maxPos = -1;
int searchSize = MIN(windowSize, lookAheadSize);
for (int i = 0; i < searchSize - maxLen; i++)
{
int j = 0;
int w = MAX(inputPos - windowSize, 0);
while (in[w + i + j] == in[inputPos + j] && w+i+j < inputPos && j < LZ77LookAheadBufferSize)
j++;
if (j > maxLen)
{
maxLen = j;
maxPos = i;
}
}
LZ77Token token;
if (maxPos != -1)
{
token.len = maxLen;
token.offset = windowSize - maxPos;
token.c = in[inputPos + maxLen];
windowSize += maxLen;
inputPos += maxLen + 1;
}
else
{
token.len = 0;
token.offset = 0;
token.c = in[inputPos];
inputPos += 1;
windowSize++;
}
windowSize = MIN(windowSize, LZ77WindowMaxSize);
LZ77StackPush(&tokens, token);
}
int numTokens = tokens.size;
*out = tokens.ptr;
return numTokens;
}
// usage:
// int* decompressed = (int*)LZ77Decompress(tokens, numTokens);
char* LZ77Decompress(LZ77Token* tokens, int numTokens)
{
int totalSize = 0;
for (int i = 0; i < numTokens; i++)
totalSize += tokens[i].len + 1;
char* result = (char*)LZ77Allocate(totalSize);
char* curr = result;
for (int i = 0; i < numTokens; i++)
{
if (tokens[i].len)
{
SmallMemCpy(curr, curr - tokens[i].offset, tokens[i].len);
curr += tokens[i].len;
}
*curr++ = tokens[i].c;
}
return result;
}
#ifdef 0 // test
#include <stdio.h>
#include <assert.h>
int main()
{
int input[1024]={0};
LZ77Token* tokens;
int numTokens = LZ77Compress(input, sizeof(input), &tokens);
int* decompressed = (int*)LZ77Decompress(tokens, numTokens);
for (int i = 0; i < sizeof(input) / sizeof(int); i++)
{
assert(input[i] == decompressed[i]);
}
printf("input size: %lu, compressed Size: %lu", sizeof(input), sizeof(LZ77Token) * numTokens);
LZ77Deallocate(decompressed);
LZ77Deallocate(tokens);
return 0;
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment