Created
August 27, 2011 06:27
-
-
Save graphitemaster/1175067 to your computer and use it in GitHub Desktop.
ZLIB compression in 200 LOC
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ~zlib compression in 200 LOC quality allowed (5-8), where 5 is lowest, and 8 is heighest, the lower | |
// the quality the smaller the file. By graphitemaster, licensed unded public domain. | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
/* fast lookup tables for length and distance */ | |
static const unsigned short zlib_lengthc[] = { | |
0x003, 0x004, 0x005, 0x006, 0x007, 0x008, 0x009, 0x00A, 0x00B, 0x00D, | |
0x00F, 0x011, 0x013, 0x017, 0x01B, 0x01F, 0x023, 0x02B, 0x033, 0x03B, | |
0x043, 0x053, 0x063, 0x073, 0x083, 0x0A3, 0x0C3, 0x0E3, 0x102, 0x103 | |
}; | |
static const unsigned char zlib_lengtheb[] = { | |
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x1, 0x1, 0x2, 0x2, | |
0x2, 0x2, 0x3, 0x3, 0x3, 0x3, 0x4, 0x4, 0x4, 0x4, 0x5, 0x5, 0x5, 0x5, | |
0x0, | |
}; | |
static const unsigned short zlib_distc[] = { | |
0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000D, 0x0011, 0x0019, | |
0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00C1, 0x0101, 0x0181, 0x0201, 0x0301, | |
0x0401, 0x0601, 0x0801, 0x0C01, 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, | |
0x8000 | |
}; | |
static const unsigned char zlib_disteb[] = { | |
0x0, 0x0, 0x0, 0x0, 0x1, 0x1, 0x2, 0x2, 0x3, 0x3, 0x4, 0x4, 0x5, 0x5, | |
0x6, 0x6, 0x7, 0x7, 0x8, 0x8, 0x9, 0x9, 0xA, 0xA, 0xB, 0xB, 0xC, 0xC, | |
0xD, 0xD | |
}; | |
/* really fast strechy buffer data storage system for zlib. */ | |
/* first element stores the allocated size of the array, and */ | |
/* the second element stores the amount of elements in use. */ | |
#define zlib_data_add(a,v) ((((a)==0 ||((int*)(a)-2)[1]+(1)>=((int*)(a)-2)[0])? zlib_data_grow((void**)&(a),(1),sizeof(*(a))):0),(a)[((int*)(a)-2)[1]++]=(v)) | |
#define zlib_data_count(a) ((a)? ((int*)(a)-2)[1]:0) | |
#define zlib_data_free(a) ((a)?free(((int*)(a)-2)),0:0) | |
#define zlib_huffa(T) \ | |
((T)<=143?(bitbuf|=zlib_bitrev(0x030+(T)-000,8)<<bitcnt,bitcnt+=8,out=zlib_flush(out,&bitbuf,&bitcnt)) : \ | |
(T)<=255?(bitbuf|=zlib_bitrev(0x190+(T)-144,9)<<bitcnt,bitcnt+=9,out=zlib_flush(out,&bitbuf,&bitcnt)) : \ | |
(T)<=279?(bitbuf|=zlib_bitrev(0x000+(T)-256,7)<<bitcnt,bitcnt+=7,out=zlib_flush(out,&bitbuf,&bitcnt)) : \ | |
(bitbuf|=zlib_bitrev(0x0C0+(T)-280,8)<<bitcnt,bitcnt+=8,out=zlib_flush(out,&bitbuf,&bitcnt))) | |
#define zlib_huffb(T) \ | |
((T)<=143?(bitbuf|=zlib_bitrev(0x030+(T)-000,8)<<bitcnt,bitcnt+=8,out=zlib_flush(out,&bitbuf,&bitcnt)) : \ | |
(bitbuf|=zlib_bitrev(0x190+(T)-144,9)<<bitcnt,bitcnt+=9,out=zlib_flush(out,&bitbuf,&bitcnt))) | |
static void *zlib_data_grow(void **a, int in, int it) { | |
int m = *a ? 2 * ((int*)(*a)-2)[0]+in : in+1; | |
void *p = realloc(*a ? ((int*)(*a)-2) : 0, it * m + sizeof(int)*2); | |
if (p) { | |
if (!*a) ((int*)p)[1] = 0; | |
*a = (void*)((int*)p+2); | |
((int*)(*a)-2)[0] = m; | |
} | |
return *a; | |
} | |
static unsigned char *zlib_flush(unsigned char *data, unsigned int *bitbuf, int *bitcnt) { | |
while (*bitcnt >= 8) { | |
zlib_data_add(data, (unsigned char)*bitbuf); | |
*bitbuf >>= 8; | |
*bitcnt -= 8; | |
} | |
return data; | |
} | |
static int zlib_bitrev(int c, int cb) { | |
int r = 0; | |
while (cb--) | |
r = (r << 1) | (c & 1); c >>= 1; | |
return r; | |
} | |
static unsigned int zlib_hash(unsigned char *data) { | |
unsigned int hash = data[0] + (data[1] << 8) + (data[2] << 16); | |
hash ^= hash << 3; hash += hash >> 5; | |
hash ^= hash << 4; hash += hash >> 17; | |
hash ^= hash << 25; hash += hash >> 6; | |
return hash; | |
} | |
static unsigned int zlib_count(unsigned char *a, unsigned char *b, int l) { | |
int i; | |
for (i = 0; i < l && i < 258; ++i) | |
if (a[i] != b[i]) break; | |
return i; | |
} | |
// compress | |
unsigned char *zlib_compress(unsigned char *data, int len, int *olen, int quality) { | |
unsigned char *out = NULL; | |
unsigned char **hash[16384]; // 64KB stack | |
/* bit data */ | |
int bitcnt = 0, i, j; | |
unsigned int bitbuf = 0; | |
/* clamp quality */ | |
if (quality < 5) | |
quality = 5; | |
zlib_data_add(out, 0x78); /* DEFLATE */ | |
zlib_data_add(out, 0x5E); /* FLEVEL */ | |
bitbuf |= 1 << bitcnt, bitcnt += 1, out = zlib_flush(out, &bitbuf, &bitcnt); /* BFINAL */ | |
bitbuf |= 1 << bitcnt, bitcnt += 2, out = zlib_flush(out, &bitbuf, &bitcnt); /* BTYPE */ | |
/* ensure NULL */ | |
for (i = 0; i < 16384; ++i) | |
hash[i] = NULL; | |
i = 0; | |
/* begin compression */ | |
while (i < len - 3) { | |
int hashi = zlib_hash(data + i)&(16383); | |
int besti = 3; | |
unsigned char *bestl = 0; | |
unsigned char **hlist = hash[hashi]; | |
int nlist = zlib_data_count(hlist); | |
for (j = 0; j < nlist; ++j) { | |
/* entry is within a window */ | |
if (hlist[j]-data > i-32768) { | |
int get = zlib_count(hlist[j], data+i, len-i); | |
if (get >= besti) { | |
besti = get; | |
bestl = hlist[j]; | |
} | |
} | |
} | |
/* hash entry too long? delete half entries */ | |
if (hash[hashi] && ((int*)(hash[hashi])-2)[1] == 2*quality) { | |
memcpy(hash[hashi], hash[hashi]+quality, sizeof(hash[hashi][0])*quality); | |
((int*)(hash[hashi])-2)[1] = quality; | |
} | |
zlib_data_add(hash[hashi], data+i); | |
/* check match at next byte. */ | |
if (bestl) { | |
hashi = zlib_hash(data + i + 1)&(16383); | |
hlist = hash[hashi]; | |
nlist = zlib_data_count(hlist); | |
for (j = 0; j < nlist; ++j) { | |
if(hlist[j] - data > i - 32767) { | |
int get = zlib_count(hlist[j], data + i + 1, len - i - 1); | |
if (get > besti) { | |
bestl = NULL; | |
break; | |
} | |
} | |
} | |
} | |
/* huffman */ | |
if (bestl) { | |
int get = data + i - bestl; /* distance */ | |
for (j = 0; besti > zlib_lengthc[j+1]-1; ++j); | |
zlib_huffa(j+257); | |
for (j = 0; get > zlib_distc[j+1]-1; ++j); | |
bitbuf |= zlib_bitrev(j,5) << bitcnt; | |
bitcnt += 5; | |
out = zlib_flush(out, &bitbuf, &bitcnt); | |
if (zlib_lengtheb[j]) { | |
bitbuf |= besti - zlib_lengthc[j] << bitcnt; | |
bitcnt += zlib_lengtheb[j]; | |
out = zlib_flush(out, &bitbuf, &bitcnt); | |
} | |
for (j = 0; get > zlib_distc[j+1]-1; ++j); | |
if (zlib_disteb[j]) { | |
bitbuf |= get - zlib_distc[j] << bitcnt; | |
bitcnt += zlib_disteb[j]; | |
out = zlib_flush(out, &bitbuf, &bitcnt); | |
} | |
i += besti; | |
} else { | |
zlib_huffb(data[i]); | |
++i; | |
} | |
} | |
/* write final bytes */ | |
for (; i < len; ++i) | |
zlib_huffb(data[i]); | |
zlib_huffa(256); /* end of block */ | |
/* padd with 0 bits to boundry */ | |
while (bitcnt) { | |
bitbuf |= 0 << bitcnt; | |
bitcnt += 1; | |
out = zlib_flush(out,&bitbuf,&bitcnt); | |
} | |
/* free hash table */ | |
for (i = 0; i < 16384; ++i) | |
zlib_data_free(hash[i]); | |
/* compute ADLER32 */ | |
unsigned int tab[6] = { 0, 1, 0, len % 5552, 0, 5552}; | |
while (tab[4] < len) { | |
for (tab[0] = 0; tab [0] < tab[3]; tab[0]++) { | |
tab[1] += data[j+i]; | |
tab[2] += tab [1+0]; | |
} | |
tab[1] %= 65521; | |
tab[2] %= 65521; | |
tab[4] += tab[3]; | |
tab[3] = tab[5]; | |
} | |
zlib_data_add(out, (unsigned char)(tab[2]>>8)); | |
zlib_data_add(out, (unsigned char)(tab[2]>>0)); | |
zlib_data_add(out, (unsigned char)(tab[1]>>8)); | |
zlib_data_add(out, (unsigned char)(tab[1]>>0)); | |
*olen = ((int*)out-2)[1]; | |
/* make freeable */ | |
memmove(((int*)out-2), out, *olen); | |
return (unsigned char*)((int *)out-2); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment