Skip to content

Instantly share code, notes, and snippets.

@guojh
Created December 3, 2012 05:21
Show Gist options
  • Save guojh/4192915 to your computer and use it in GitHub Desktop.
Save guojh/4192915 to your computer and use it in GitHub Desktop.
huffman encode/decode program
#define __BSD_SOURCE
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <endian.h>
#define CHAR_NUM 256
#define MAX_CODELEN 32
#define BITBUF_SIZE 4096
#define MAGIC_STRING "#GUO'S_COMPRESS#"
#define MAGIC_SIZE 16
#define VERSION 1
#define VERSION_LE le32toh(VERSION)
typedef struct bitbuf {
int fd;
uint8_t buf[BITBUF_SIZE];
int bittail, bithead;
uint64_t io_count;
} *bitbuf_t;
static void bitbuf_open(bitbuf_t bitbuf, int fd);
static int bitbuf_len(bitbuf_t bitbuf);
static int bitbuf_left(bitbuf_t bitbuf);
static int bitbuf_readone(bitbuf_t bitbuf);
static void bitbuf_wsync(bitbuf_t bitbut);
static void bitbuf_write(bitbuf_t bitbuf, uint32_t bits, int len);
typedef struct weight_table {
uint8_t weight[CHAR_NUM];
} *weight_table_t;
static void gen_weight_table(weight_table_t wt, int fd);
typedef struct hcode {
uint32_t code;
int codelen;
} *hcode_t;
typedef struct htree_node *htree_node_t;
struct htree_node {
htree_node_t left, right, parent;
int weight;
int code;
int height;
struct hcode hcode;
};
typedef struct htree {
htree_node_t root, nodes[CHAR_NUM];
} *htree_t;
static void gen_htree(htree_t htree, weight_table_t wt);
static htree_node_t htree_encode_one(htree_t htree, char c);
static htree_node_t htree_decode_one(htree_t htree, bitbuf_t bitbuf);
static uint64_t htree_encode(htree_t htree, int fdin, int fdout);
static void htree_decode(htree_t htree, int fdin, int fdout, uint64_t bitcount);
struct hfile {
char magic[MAGIC_SIZE];
uint32_t version;
uint32_t _pad1;
uint64_t bitcount;
struct weight_table weight_table;
char data[0];
};
static void encode(int fdin, int fdout);
static void decode(int fdin, int fdout);
static void bitbuf_open(bitbuf_t bb, int fd) {
bb->fd = fd;
memset(bb->buf, 0, sizeof(bb->buf));
bb->bittail = bb->bithead = 0;
bb->io_count = 0;
}
static int bitbuf_len(bitbuf_t bb) {
return bb->bittail - bb->bithead;
}
static int bitbuf_left(bitbuf_t bb) {
return BITBUF_SIZE*8 - bitbuf_len(bb);
}
static int bitbuf_readone(bitbuf_t bb) {
if(bb->bittail == bb->bithead) {
int ret;
bb->bithead = 0;
ret = read(bb->fd, bb->buf, BITBUF_SIZE);
if(ret < 0) {
perror("butbuf_readone: read");
exit(-1);
}
if(ret == 0)
return -1;
bb->bittail = ret*8;
}
int pos, blen, ret;
pos = bb->bithead / 8;
blen = bb->bithead % 8;
ret = (bb->buf[pos]>>blen)&1;
bb->bithead++;
return ret;
}
static void bitbuf_wsync(bitbuf_t bb) {
int bytes = bb->bittail/8;
int ret;
char *p;
p = bb->buf;
bb->io_count += bytes;
while(bytes) {
ret = write(bb->fd, p, bytes);
if(ret<0) {
perror("bitbuf_wsync: write");
exit(-1);
}
p += ret;
bytes -= ret;
}
bb->bithead = 0;
if(bb->bittail % 8)
bb->buf[0] = bb->buf[bb->bittail/8];
else
bb->buf[0] = 0;
bb->bittail %= 8;
memset(bb->buf+1, 0, sizeof(bb->buf)-1);
}
static void bitbuf_write(bitbuf_t bb, uint32_t bits, int len) {
int pos, blen, bleft, diff;
if(bitbuf_left(bb) < len)
bitbuf_wsync(bb);
pos = bb->bittail / 8;
blen = bb->bittail % 8;
bleft = 8 - blen;
if(bleft) {
bb->buf[pos] |= bits<<blen;
bits >>= bleft;
if(len<=bleft) {
bb->bittail += len;
return;
} else {
bb->bittail += bleft;
}
}
len -= bleft;
while(len) {
diff = len>8 ? 8 : len;
bb->buf[++pos] = bits;
bits >>= diff;
bb->bittail += diff;
len -= diff;
}
}
static void gen_weight_table(weight_table_t wt, int fd) {
char buf[4096];
uint32_t lw[CHAR_NUM], maxw;
double devide;
int ret;
for(int i=0; i<CHAR_NUM; i++)
lw[i] = 0;
while(1) {
ret = read(fd, buf, sizeof(buf));
if(ret<0) {
perror("gen_weight_table: read");
exit(-1);
}
if(ret == 0)
break;
for(int i=0; i<ret; i++)
lw[(uint8_t)buf[i]]++;
}
maxw = 0;
for(int i=0; i<CHAR_NUM; i++)
if(lw[i] > maxw)
maxw = lw[i];
devide = 1;
if(maxw>250)
devide = maxw/250;
for(int i=0; i<CHAR_NUM; i++) {
wt->weight[i] = lw[i]/devide;
if(lw[i] && !wt->weight[i])
wt->weight[i] = 1;
}
}
static void hnode_shift(htree_node_t hnode, int isright) {
if(hnode->left) {
hnode_shift(hnode->left, isright);
hnode_shift(hnode->right, isright);
} else {
hnode->hcode.code <<= 1;
hnode->hcode.code |= isright;
hnode->hcode.codelen++;
}
}
static void gen_htree(htree_t htree, weight_table_t wt) {
htree_node_t nodes[CHAR_NUM], node;
int len;
int maxcodelen;
len = 0;
for(int i=0; i<CHAR_NUM; i++) {
if(wt->weight[i]) {
node = malloc(sizeof(struct htree_node));
node->left = NULL;
node->right = NULL;
node->parent = NULL;
node->weight = wt->weight[i];
node->code = i;
node->hcode.code = 0;
node->hcode.codelen = 0;
node->height = 0;
nodes[len++] = node;
htree->nodes[i] = node;
} else
htree->nodes[i] = NULL;
}
while(len > 1) {
int tmp;
tmp = len-1;
maxcodelen = MAX_CODELEN-1;
while(tmp>>=1)
maxcodelen--;
for(int i=0; i<len; i++)
for(int j=i+1; j<len; j++)
if(nodes[j]->height > maxcodelen ||
nodes[i]->weight < nodes[j]->weight ||
nodes[i]->weight == nodes[j]->weight && // make the result unique
nodes[i]->code > nodes[j]->code) {
htree_node_t tmp;
tmp = nodes[i];
nodes[i] = nodes[j];
nodes[j] = tmp;
}
htree_node_t tl, tr, tn;
tl = nodes[len-2];
tr = nodes[len-1];
hnode_shift(tl, 0);
hnode_shift(tr, 1);
tn = malloc(sizeof(struct htree_node));
tn->left = tl;
tn->right = tr;
tl->parent = tn;
tr->parent = tn;
tn->weight = tl->weight + tr->weight;
tn->code = tl->code < tr->code ? tl->code : tr->code;
tn->height = 1 + ( tl->height > tr->height ? tl->height : tr->height );
len--;
nodes[len-1] = tn;
}
htree->root = nodes[0];
}
static void print_codes(htree_t htree) {
int i, j;
htree_node_t node;
int s=0, w=0;
double comp;
putchar('\n');
for(i=0; i<32; i++) {
for(j=0; j<8; j++) {
node = htree->nodes[i*8+j];
if(node) {
printf(" [%3d:%2d] ", node->weight, node->hcode.codelen);
w += node->weight;
s += node->weight * node->hcode.codelen;
}
else
printf(" NULL ");
}
putchar('\n');
}
comp = (double)s/w;
printf("average code-len: %6.2lf\n", comp);
printf("compressed: %6.2lf%%\n", (1-comp/8)*100);
}
static htree_node_t htree_encode_one(htree_t htree, char c) {
return htree->nodes[(uint8_t)c];
}
static htree_node_t htree_decode_one(htree_t htree, bitbuf_t bitbuf) {
int bit;
htree_node_t node;
node = htree->root;
while(1) {
if(node->left) {
bit = bitbuf_readone(bitbuf);
if(bit<0)
return NULL;
if(!bit)
node = node->left;
else
node = node->right;
} else
return node;
}
}
static uint64_t htree_encode(htree_t htree, int fdin, int fdout) {
struct bitbuf bb;
char buf[4096];
int ret;
int bitmore;
uint64_t bitcount;
bitbuf_open(&bb, fdout);
while(1) {
ret = read(fdin, buf, sizeof(buf));
if(ret < 0) {
perror("htree_encode: read");
exit(-1);
}
if(ret == 0)
break;
for(int i=0; i<ret; i++) {
hcode_t hcode = &htree_encode_one(htree, buf[i])->hcode;
bitbuf_write(&bb, hcode->code, hcode->codelen);
}
}
bitmore = bitbuf_len(&bb);
bitcount = bb.io_count*8 + bitmore;
if(bitmore) {
bitbuf_write(&bb, 0, 32);
bitbuf_wsync(&bb);
}
return bitcount;
}
static void htree_decode(htree_t htree, int fdin, int fdout, uint64_t bitcount) {
struct bitbuf bb;
char buf[4096], *p;
uint32_t len;
int ret;
uint64_t pbitcount;
bitbuf_open(&bb, fdin);
pbitcount = 0;
while(pbitcount < bitcount) {
len = 0;
while(pbitcount < bitcount && len<sizeof(buf)) {
htree_node_t node = htree_decode_one(htree, &bb);
pbitcount += node->hcode.codelen;
buf[len++] = node->code;
}
p = buf;
while(len) {
ret = write(fdout, p, len);
if(ret<0) {
perror("htree_decode: write");
exit(-1);
}
p += ret;
len -= ret;
}
}
}
static void encode(int fdin, int fdout) {
struct hfile hfile;
struct htree htree;
int ret;
memset(&hfile, 0, sizeof(hfile));
memcpy(hfile.magic, MAGIC_STRING, MAGIC_SIZE);
hfile.version = VERSION_LE;
ret = lseek(fdin, 0, SEEK_SET);
if(ret < 0) {
perror("encode: lseek fdin");
exit(-1);
}
gen_weight_table(&hfile.weight_table, fdin);
gen_htree(&htree, &hfile.weight_table);
print_codes(&htree);
lseek(fdin, 0, SEEK_SET);
lseek(fdout, sizeof(struct hfile), SEEK_SET);
hfile.bitcount = htree_encode(&htree, fdin, fdout);
hfile.bitcount = htole64(hfile.bitcount);
lseek(fdout, 0, SEEK_SET);
ret = write(fdout, &hfile, sizeof(hfile));
if(ret<0) {
perror("encode: write hfile");
exit(-1);
}
if(ret != sizeof(hfile)) {
fprintf(stderr, "encode: hfile write failed\n");
exit(-1);
}
}
static void decode(int fdin, int fdout) {
struct hfile hfile;
struct htree htree;
int ret;
ret = read(fdin, &hfile, sizeof(hfile));
if(ret < 0) {
perror("encode: read &hfile");
exit(-1);
}
if(ret != sizeof(hfile)) {
fprintf(stderr, "decode: file too small\n");
exit(-1);
}
if(memcmp(hfile.magic, MAGIC_STRING, MAGIC_SIZE)) {
fprintf(stderr, "decode: bad magic\n");
exit(-1);
}
if(hfile.version != VERSION_LE) {
fprintf(stderr, "decode: bad version, expect %d but got %d\n",
le32toh(hfile.version), VERSION);
exit(-1);
}
hfile.bitcount = le64toh(hfile.bitcount);
gen_htree(&htree, &hfile.weight_table);
print_codes(&htree);
htree_decode(&htree, fdin, fdout, hfile.bitcount);
}
int main(int argc, char **argv) {
int fdin, fdout;
if(argc != 4) {
fprintf(stderr, "too few arguments\n");
exit(-1);
}
fdin = open(argv[2], O_RDONLY);
if(fdin<0) {
perror("main: open fdin");
exit(-1);
}
fdout = open(argv[3], O_WRONLY | O_CREAT | O_TRUNC, 0644);
if(fdout<0) {
perror("main: open fdout");
exit(-1);
}
if(*argv[1] == 'c') {
encode(fdin, fdout);
}
else if(*argv[1] == 'd') {
decode(fdin, fdout);
}
else {
fprintf(stderr, "unknown action %c", *argv[1]);
exit(-1);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment