Created
December 3, 2012 05:21
-
-
Save guojh/4192915 to your computer and use it in GitHub Desktop.
huffman encode/decode program
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define __BSD_SOURCE | |
#include <unistd.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <string.h> | |
#include <sys/stat.h> | |
#include <fcntl.h> | |
#include <endian.h> | |
#define CHAR_NUM 256 | |
#define MAX_CODELEN 32 | |
#define BITBUF_SIZE 4096 | |
#define MAGIC_STRING "#GUO'S_COMPRESS#" | |
#define MAGIC_SIZE 16 | |
#define VERSION 1 | |
#define VERSION_LE le32toh(VERSION) | |
typedef struct bitbuf { | |
int fd; | |
uint8_t buf[BITBUF_SIZE]; | |
int bittail, bithead; | |
uint64_t io_count; | |
} *bitbuf_t; | |
static void bitbuf_open(bitbuf_t bitbuf, int fd); | |
static int bitbuf_len(bitbuf_t bitbuf); | |
static int bitbuf_left(bitbuf_t bitbuf); | |
static int bitbuf_readone(bitbuf_t bitbuf); | |
static void bitbuf_wsync(bitbuf_t bitbut); | |
static void bitbuf_write(bitbuf_t bitbuf, uint32_t bits, int len); | |
typedef struct weight_table { | |
uint8_t weight[CHAR_NUM]; | |
} *weight_table_t; | |
static void gen_weight_table(weight_table_t wt, int fd); | |
typedef struct hcode { | |
uint32_t code; | |
int codelen; | |
} *hcode_t; | |
typedef struct htree_node *htree_node_t; | |
struct htree_node { | |
htree_node_t left, right, parent; | |
int weight; | |
int code; | |
int height; | |
struct hcode hcode; | |
}; | |
typedef struct htree { | |
htree_node_t root, nodes[CHAR_NUM]; | |
} *htree_t; | |
static void gen_htree(htree_t htree, weight_table_t wt); | |
static htree_node_t htree_encode_one(htree_t htree, char c); | |
static htree_node_t htree_decode_one(htree_t htree, bitbuf_t bitbuf); | |
static uint64_t htree_encode(htree_t htree, int fdin, int fdout); | |
static void htree_decode(htree_t htree, int fdin, int fdout, uint64_t bitcount); | |
struct hfile { | |
char magic[MAGIC_SIZE]; | |
uint32_t version; | |
uint32_t _pad1; | |
uint64_t bitcount; | |
struct weight_table weight_table; | |
char data[0]; | |
}; | |
static void encode(int fdin, int fdout); | |
static void decode(int fdin, int fdout); | |
static void bitbuf_open(bitbuf_t bb, int fd) { | |
bb->fd = fd; | |
memset(bb->buf, 0, sizeof(bb->buf)); | |
bb->bittail = bb->bithead = 0; | |
bb->io_count = 0; | |
} | |
static int bitbuf_len(bitbuf_t bb) { | |
return bb->bittail - bb->bithead; | |
} | |
static int bitbuf_left(bitbuf_t bb) { | |
return BITBUF_SIZE*8 - bitbuf_len(bb); | |
} | |
static int bitbuf_readone(bitbuf_t bb) { | |
if(bb->bittail == bb->bithead) { | |
int ret; | |
bb->bithead = 0; | |
ret = read(bb->fd, bb->buf, BITBUF_SIZE); | |
if(ret < 0) { | |
perror("butbuf_readone: read"); | |
exit(-1); | |
} | |
if(ret == 0) | |
return -1; | |
bb->bittail = ret*8; | |
} | |
int pos, blen, ret; | |
pos = bb->bithead / 8; | |
blen = bb->bithead % 8; | |
ret = (bb->buf[pos]>>blen)&1; | |
bb->bithead++; | |
return ret; | |
} | |
static void bitbuf_wsync(bitbuf_t bb) { | |
int bytes = bb->bittail/8; | |
int ret; | |
char *p; | |
p = bb->buf; | |
bb->io_count += bytes; | |
while(bytes) { | |
ret = write(bb->fd, p, bytes); | |
if(ret<0) { | |
perror("bitbuf_wsync: write"); | |
exit(-1); | |
} | |
p += ret; | |
bytes -= ret; | |
} | |
bb->bithead = 0; | |
if(bb->bittail % 8) | |
bb->buf[0] = bb->buf[bb->bittail/8]; | |
else | |
bb->buf[0] = 0; | |
bb->bittail %= 8; | |
memset(bb->buf+1, 0, sizeof(bb->buf)-1); | |
} | |
static void bitbuf_write(bitbuf_t bb, uint32_t bits, int len) { | |
int pos, blen, bleft, diff; | |
if(bitbuf_left(bb) < len) | |
bitbuf_wsync(bb); | |
pos = bb->bittail / 8; | |
blen = bb->bittail % 8; | |
bleft = 8 - blen; | |
if(bleft) { | |
bb->buf[pos] |= bits<<blen; | |
bits >>= bleft; | |
if(len<=bleft) { | |
bb->bittail += len; | |
return; | |
} else { | |
bb->bittail += bleft; | |
} | |
} | |
len -= bleft; | |
while(len) { | |
diff = len>8 ? 8 : len; | |
bb->buf[++pos] = bits; | |
bits >>= diff; | |
bb->bittail += diff; | |
len -= diff; | |
} | |
} | |
static void gen_weight_table(weight_table_t wt, int fd) { | |
char buf[4096]; | |
uint32_t lw[CHAR_NUM], maxw; | |
double devide; | |
int ret; | |
for(int i=0; i<CHAR_NUM; i++) | |
lw[i] = 0; | |
while(1) { | |
ret = read(fd, buf, sizeof(buf)); | |
if(ret<0) { | |
perror("gen_weight_table: read"); | |
exit(-1); | |
} | |
if(ret == 0) | |
break; | |
for(int i=0; i<ret; i++) | |
lw[(uint8_t)buf[i]]++; | |
} | |
maxw = 0; | |
for(int i=0; i<CHAR_NUM; i++) | |
if(lw[i] > maxw) | |
maxw = lw[i]; | |
devide = 1; | |
if(maxw>250) | |
devide = maxw/250; | |
for(int i=0; i<CHAR_NUM; i++) { | |
wt->weight[i] = lw[i]/devide; | |
if(lw[i] && !wt->weight[i]) | |
wt->weight[i] = 1; | |
} | |
} | |
static void hnode_shift(htree_node_t hnode, int isright) { | |
if(hnode->left) { | |
hnode_shift(hnode->left, isright); | |
hnode_shift(hnode->right, isright); | |
} else { | |
hnode->hcode.code <<= 1; | |
hnode->hcode.code |= isright; | |
hnode->hcode.codelen++; | |
} | |
} | |
static void gen_htree(htree_t htree, weight_table_t wt) { | |
htree_node_t nodes[CHAR_NUM], node; | |
int len; | |
int maxcodelen; | |
len = 0; | |
for(int i=0; i<CHAR_NUM; i++) { | |
if(wt->weight[i]) { | |
node = malloc(sizeof(struct htree_node)); | |
node->left = NULL; | |
node->right = NULL; | |
node->parent = NULL; | |
node->weight = wt->weight[i]; | |
node->code = i; | |
node->hcode.code = 0; | |
node->hcode.codelen = 0; | |
node->height = 0; | |
nodes[len++] = node; | |
htree->nodes[i] = node; | |
} else | |
htree->nodes[i] = NULL; | |
} | |
while(len > 1) { | |
int tmp; | |
tmp = len-1; | |
maxcodelen = MAX_CODELEN-1; | |
while(tmp>>=1) | |
maxcodelen--; | |
for(int i=0; i<len; i++) | |
for(int j=i+1; j<len; j++) | |
if(nodes[j]->height > maxcodelen || | |
nodes[i]->weight < nodes[j]->weight || | |
nodes[i]->weight == nodes[j]->weight && // make the result unique | |
nodes[i]->code > nodes[j]->code) { | |
htree_node_t tmp; | |
tmp = nodes[i]; | |
nodes[i] = nodes[j]; | |
nodes[j] = tmp; | |
} | |
htree_node_t tl, tr, tn; | |
tl = nodes[len-2]; | |
tr = nodes[len-1]; | |
hnode_shift(tl, 0); | |
hnode_shift(tr, 1); | |
tn = malloc(sizeof(struct htree_node)); | |
tn->left = tl; | |
tn->right = tr; | |
tl->parent = tn; | |
tr->parent = tn; | |
tn->weight = tl->weight + tr->weight; | |
tn->code = tl->code < tr->code ? tl->code : tr->code; | |
tn->height = 1 + ( tl->height > tr->height ? tl->height : tr->height ); | |
len--; | |
nodes[len-1] = tn; | |
} | |
htree->root = nodes[0]; | |
} | |
static void print_codes(htree_t htree) { | |
int i, j; | |
htree_node_t node; | |
int s=0, w=0; | |
double comp; | |
putchar('\n'); | |
for(i=0; i<32; i++) { | |
for(j=0; j<8; j++) { | |
node = htree->nodes[i*8+j]; | |
if(node) { | |
printf(" [%3d:%2d] ", node->weight, node->hcode.codelen); | |
w += node->weight; | |
s += node->weight * node->hcode.codelen; | |
} | |
else | |
printf(" NULL "); | |
} | |
putchar('\n'); | |
} | |
comp = (double)s/w; | |
printf("average code-len: %6.2lf\n", comp); | |
printf("compressed: %6.2lf%%\n", (1-comp/8)*100); | |
} | |
static htree_node_t htree_encode_one(htree_t htree, char c) { | |
return htree->nodes[(uint8_t)c]; | |
} | |
static htree_node_t htree_decode_one(htree_t htree, bitbuf_t bitbuf) { | |
int bit; | |
htree_node_t node; | |
node = htree->root; | |
while(1) { | |
if(node->left) { | |
bit = bitbuf_readone(bitbuf); | |
if(bit<0) | |
return NULL; | |
if(!bit) | |
node = node->left; | |
else | |
node = node->right; | |
} else | |
return node; | |
} | |
} | |
static uint64_t htree_encode(htree_t htree, int fdin, int fdout) { | |
struct bitbuf bb; | |
char buf[4096]; | |
int ret; | |
int bitmore; | |
uint64_t bitcount; | |
bitbuf_open(&bb, fdout); | |
while(1) { | |
ret = read(fdin, buf, sizeof(buf)); | |
if(ret < 0) { | |
perror("htree_encode: read"); | |
exit(-1); | |
} | |
if(ret == 0) | |
break; | |
for(int i=0; i<ret; i++) { | |
hcode_t hcode = &htree_encode_one(htree, buf[i])->hcode; | |
bitbuf_write(&bb, hcode->code, hcode->codelen); | |
} | |
} | |
bitmore = bitbuf_len(&bb); | |
bitcount = bb.io_count*8 + bitmore; | |
if(bitmore) { | |
bitbuf_write(&bb, 0, 32); | |
bitbuf_wsync(&bb); | |
} | |
return bitcount; | |
} | |
static void htree_decode(htree_t htree, int fdin, int fdout, uint64_t bitcount) { | |
struct bitbuf bb; | |
char buf[4096], *p; | |
uint32_t len; | |
int ret; | |
uint64_t pbitcount; | |
bitbuf_open(&bb, fdin); | |
pbitcount = 0; | |
while(pbitcount < bitcount) { | |
len = 0; | |
while(pbitcount < bitcount && len<sizeof(buf)) { | |
htree_node_t node = htree_decode_one(htree, &bb); | |
pbitcount += node->hcode.codelen; | |
buf[len++] = node->code; | |
} | |
p = buf; | |
while(len) { | |
ret = write(fdout, p, len); | |
if(ret<0) { | |
perror("htree_decode: write"); | |
exit(-1); | |
} | |
p += ret; | |
len -= ret; | |
} | |
} | |
} | |
static void encode(int fdin, int fdout) { | |
struct hfile hfile; | |
struct htree htree; | |
int ret; | |
memset(&hfile, 0, sizeof(hfile)); | |
memcpy(hfile.magic, MAGIC_STRING, MAGIC_SIZE); | |
hfile.version = VERSION_LE; | |
ret = lseek(fdin, 0, SEEK_SET); | |
if(ret < 0) { | |
perror("encode: lseek fdin"); | |
exit(-1); | |
} | |
gen_weight_table(&hfile.weight_table, fdin); | |
gen_htree(&htree, &hfile.weight_table); | |
print_codes(&htree); | |
lseek(fdin, 0, SEEK_SET); | |
lseek(fdout, sizeof(struct hfile), SEEK_SET); | |
hfile.bitcount = htree_encode(&htree, fdin, fdout); | |
hfile.bitcount = htole64(hfile.bitcount); | |
lseek(fdout, 0, SEEK_SET); | |
ret = write(fdout, &hfile, sizeof(hfile)); | |
if(ret<0) { | |
perror("encode: write hfile"); | |
exit(-1); | |
} | |
if(ret != sizeof(hfile)) { | |
fprintf(stderr, "encode: hfile write failed\n"); | |
exit(-1); | |
} | |
} | |
static void decode(int fdin, int fdout) { | |
struct hfile hfile; | |
struct htree htree; | |
int ret; | |
ret = read(fdin, &hfile, sizeof(hfile)); | |
if(ret < 0) { | |
perror("encode: read &hfile"); | |
exit(-1); | |
} | |
if(ret != sizeof(hfile)) { | |
fprintf(stderr, "decode: file too small\n"); | |
exit(-1); | |
} | |
if(memcmp(hfile.magic, MAGIC_STRING, MAGIC_SIZE)) { | |
fprintf(stderr, "decode: bad magic\n"); | |
exit(-1); | |
} | |
if(hfile.version != VERSION_LE) { | |
fprintf(stderr, "decode: bad version, expect %d but got %d\n", | |
le32toh(hfile.version), VERSION); | |
exit(-1); | |
} | |
hfile.bitcount = le64toh(hfile.bitcount); | |
gen_htree(&htree, &hfile.weight_table); | |
print_codes(&htree); | |
htree_decode(&htree, fdin, fdout, hfile.bitcount); | |
} | |
int main(int argc, char **argv) { | |
int fdin, fdout; | |
if(argc != 4) { | |
fprintf(stderr, "too few arguments\n"); | |
exit(-1); | |
} | |
fdin = open(argv[2], O_RDONLY); | |
if(fdin<0) { | |
perror("main: open fdin"); | |
exit(-1); | |
} | |
fdout = open(argv[3], O_WRONLY | O_CREAT | O_TRUNC, 0644); | |
if(fdout<0) { | |
perror("main: open fdout"); | |
exit(-1); | |
} | |
if(*argv[1] == 'c') { | |
encode(fdin, fdout); | |
} | |
else if(*argv[1] == 'd') { | |
decode(fdin, fdout); | |
} | |
else { | |
fprintf(stderr, "unknown action %c", *argv[1]); | |
exit(-1); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment