Skip to content

Instantly share code, notes, and snippets.

@vivkin
Last active June 4, 2017 15:31
Show Gist options
  • Save vivkin/e69995fbce7081436be6 to your computer and use it in GitHub Desktop.
Save vivkin/e69995fbce7081436be6 to your computer and use it in GitHub Desktop.
Small LZ77 compressor. Uses variable length integers from xz for coding length\offset
#define LZ_IMPLEMENTATION
#include "lz.h"
#include <stdio.h>
int main(int argc, char **argv) {
uint8_t src[1 << 16];
uint8_t dst[1 << 16];
size_t size = fread(src, 1, sizeof(src), stdin);
if (argc > 1 && argv[1][0] == '-'&& argv[1][1] == 'd')
size = lz_decompress(dst, src, size);
else
size = lz_compress(dst, src, size);
fwrite(dst, 1, size, stdout);
return 0;
}
#ifndef LZ_H
#define LZ_H
#include <stddef.h>
#include <stdint.h>
#if defined(LZ_STATIC)
#define LZDEF static
#elif defined(__cplusplus)
#define LZDEF extern "C"
#else
#define LZDEF extern
#endif
LZDEF size_t lz_compress(void *dst, const void *src, size_t n);
LZDEF size_t lz_decompress(void *dst, const void *src, size_t n);
#endif // LZ_H
#ifdef LZ_IMPLEMENTATION
enum { LZ_HASH_BITS = 14 };
static inline uint32_t lz_read32(const void *src) {
return *(const uint32_t *)src;
}
static inline void lz_write32(void *dst, uint32_t value) {
*(uint32_t *)dst = value;
}
static inline size_t lz_pack32(void *dst, uint32_t value) {
uint8_t *d = (uint8_t *)dst;
while (value >= 0x80) {
*d++ = value | 0x80;
value >>= 7;
}
*d++ = value;
return d - (uint8_t *)dst;
}
static inline size_t lz_unpack32(const void *src, uint32_t *value) {
const uint8_t *s = (const uint8_t *)src;
*value = *s & 0x7F;
while (*s++ & 0x80)
*value |= (uint32_t)(*s & 0x7F) << ((s - (const uint8_t *)src) * 7);
return s - (const uint8_t *)src;
}
static inline void *lz_copy(void *dst, const void *src, size_t n) {
uint8_t *d = (uint8_t *)dst;
const uint8_t *s = (const uint8_t *)src;
for (; n >= 4; n -= 4, d += 4, s += 4)
lz_write32(d, lz_read32(s));
while (n--)
*d++ = *s++;
return d;
}
static inline uint32_t lz_hash(uint32_t x) {
return (x * 2654435761u) >> (32 - LZ_HASH_BITS);
}
size_t lz_compress(void *dst, const void *src, size_t n) {
uint8_t *d = (uint8_t *)dst;
const uint8_t *s = (const uint8_t *)src;
const uint8_t *s_end = s + n;
const uint8_t *anchor = s;
uint32_t hash_table[1 << LZ_HASH_BITS] = {0};
uint32_t next_h = lz_hash(lz_read32(++s));
size_t length;
while (1) {
const uint8_t *ref;
const uint8_t *next_ip = s;
size_t step = 1 << 5;
do {
uint32_t h = next_h;
s = next_ip;
next_ip = s + (++step >> 5);
if (next_ip >= s_end - 4) {
goto remainder;
}
next_h = lz_hash(lz_read32(next_ip));
ref = (const uint8_t *)src + hash_table[h];
hash_table[h] = s - (const uint8_t *)src;
} while (lz_read32(s) != lz_read32(ref));
// write literals
length = s - anchor;
if (length) {
d += lz_pack32(d, length);
d += lz_pack32(d, 0);
d = (uint8_t *)lz_copy(d, anchor, length);
}
// find match length
anchor = s;
do {
s += 4;
ref += 4;
uint32_t x = lz_read32(s) ^ lz_read32(ref);
if (x) {
uint32_t n = __builtin_ctz(x) >> 3;
s += n;
ref += n;
break;
}
} while (s < s_end - 4);
// fill table
hash_table[lz_hash(lz_read32(s - 3))] = s - (const uint8_t *)src - 3;
hash_table[lz_hash(lz_read32(s - 2))] = s - (const uint8_t *)src - 2;
hash_table[lz_hash(lz_read32(s - 1))] = s - (const uint8_t *)src - 1;
d += lz_pack32(d, s - anchor);
d += lz_pack32(d, s - ref);
anchor = s;
next_h = lz_hash(lz_read32(s));
}
remainder:
length = s_end - anchor;
d += lz_pack32(d, length);
d += lz_pack32(d, 0);
d = (uint8_t *)lz_copy(d, anchor, length);
return d - (uint8_t *)dst;
}
size_t lz_decompress(void *dst, const void *src, size_t n) {
uint8_t *d = (uint8_t *)dst;
const uint8_t *s = (const uint8_t *)src;
const uint8_t *s_end = s + n;
uint32_t length;
uint32_t offset;
while (s != s_end) {
s += lz_unpack32(s, &length);
s += lz_unpack32(s, &offset);
if (offset) {
const uint8_t *ref = d - offset;
size_t dist = d - ref;
for (; dist < 4; dist = d - ref) {
lz_write32(d, lz_read32(ref));
d += dist;
length -= dist;
}
d = (uint8_t *)lz_copy(d, ref, length);
} else {
d = (uint8_t *)lz_copy(d, s, length);
s += length;
}
}
return d - (uint8_t *)dst;
}
#endif // LZ_IMPLEMENTATION
CC ?= cc
CFLAGS ?= -Wall -Wextra -std=c11 -O3
BINDIR = bin
OUTNAME = lz
SOURCES = lz.c
OBJECTS = $(SOURCES:%.c=$(BINDIR)/%.o)
export V := false
export CMD_PREFIX := @
ifeq ($(V),true)
CMD_PREFIX :=
endif
all: dirs $(BINDIR)/$(OUTNAME)
@echo "Build finished"
dirs:
@echo "Creating directories"
@mkdir -p $(BINDIR) $(dir $(OBJECTS))
clean:
@echo "Deleting binaries"
$(CMD_PREFIX)rm -f $(OBJECTS) $(BINDIR)/$(OUTNAME)
test: $(BINDIR)/$(OUTNAME)
@echo "Testing"
cat $(SOURCES) | md5
cat $(SOURCES) | $(BINDIR)/$(OUTNAME) | $(BINDIR)/$(OUTNAME) -d | md5
$(BINDIR)/%.o: %.c
@echo "Compiling: $< -> $@"
$(CMD_PREFIX)$(CC) $(CFLAGS) -c $< -o $@
$(BINDIR)/$(OUTNAME): $(OBJECTS)
@echo "Linking: $@"
$(CMD_PREFIX)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJECTS)
.PHONY: all dirs clean test
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment