Skip to content

Instantly share code, notes, and snippets.

@nauful
Created February 12, 2022 19:43
Show Gist options
  • Save nauful/4cce9552badbe648c60dac04af67e5fe to your computer and use it in GitHub Desktop.
Save nauful/4cce9552badbe648c60dac04af67e5fe to your computer and use it in GitHub Desktop.
Implemented paper: "On the Implementation of Minimum Redundancy Prefix Codes, Moffat and Turpin 1997"
void test_huf(FileReader *reader) {
const int FAST_BITS = 7;
const int MAX_BITS = 11;
uint32 counts[0x100] = { 0 };
byte buf[1 << 12];
uint32 n = 0;
reader->Rewind();
while ((n = reader->Read(buf, sizeof(buf))) > 0) {
for (uint32 i = 0; i < n; i++) {
++counts[buf[i]];
}
}
uint32 tree[0x200], tmp_tree[0x100];
int num_syms = 0;
for (int y = 0; y < 0x100; y++) {
if (!counts[y]) {
continue;
}
tree[num_syms++] = (counts[y] << 8) + y;
}
for (int b = 0, radix_mask = 1; b < 32; b++, radix_mask <<= 1) {
int cnt[2] = { 0 };
for (int y = 0; y < num_syms; y++) {
int slot = (tree[y] & radix_mask) != 0;
cnt[slot]++;
}
cnt[1] = cnt[0];
cnt[0] = 0;
for (int y = 0; y < num_syms; y++) {
int slot = (tree[y] & radix_mask) != 0;
tmp_tree[cnt[slot]++] = tree[y];
}
memcpy(tree, tmp_tree, sizeof(uint32) * num_syms);
}
while (true) {
int r0 = 0, r1 = num_syms + 1;
tree[r1] = -1;
uint16 left[0x200], right[0x200];
for (int i = num_syms + 1; i < 2 * num_syms; i++) {
tree[i] = -1;
left[i] = (tree[r0] < tree[r1]) ? r0++ : r1++;
right[i] = (tree[r0] < tree[r1]) ? r0++ : r1++;
tree[i] = ((tree[left[i]] & 0xFFFFFF00) + (tree[right[i]] & 0xFFFFFF00)) | 0xFF;
}
byte tree_lens[0x200];
tree_lens[2 * num_syms - 1] = 0;
for (int i = 2 * num_syms - 1; i > num_syms; --i) {
tree_lens[left[i]] = tree_lens[i] + 1;
tree_lens[right[i]] = tree_lens[i] + 1;
}
byte max_bits = 0;
for (int i = 0; i < num_syms; i++) {
max_bits = Max(tree_lens[i], max_bits);
}
if (max_bits <= MAX_BITS) {
for (int i = 0; i < num_syms; i++) {
tree[i] = (tree[i] & 0xFF) | (tree_lens[i] << 8);
}
break;
}
uint32 shift = max_bits - MAX_BITS;
for (int i = 0; i < num_syms; i++) {
uint32 cnt = tree[i] >> 8;
cnt >>= shift;
cnt += cnt == 0;
tree[i] = (tree[i] & 0xFF) | (cnt << 8);
}
}
byte max_bits = 0;
byte len_count[0x100] = { 0 };
for (int i = 0; i < num_syms; i++) {
++len_count[tree[i] >> 8];
max_bits = Max(byte(tree[i] >> 8), max_bits);
}
int left = 1;
uint16 cur_code = 0, next_code[16], table_base[17], tree_base[16];
table_base[0] = 0;
for (byte bits = 1; bits <= max_bits; bits++) {
left <<= 1;
left -= len_count[bits];
ASSERT(left >= 0);
cur_code += len_count[bits - 1];
cur_code <<= 1;
next_code[bits] = cur_code;
table_base[bits] = cur_code << (MAX_BITS - bits);
}
for (byte bits = max_bits + 1; bits < 17; bits++) {
table_base[bits] = 1 << MAX_BITS;
}
for (int i0 = 0, i1 = num_syms - 1; i0 < num_syms && i0 < i1; i0++, i1--) {
uint32 tmp = tree[i0];
tree[i0] = tree[i1];
tree[i1] = tmp;
}
uint16 sym_codes[0x100] = { 0 };
byte sym_lens[0x100];
for (int i = 0; i < num_syms; i++) {
sym_codes[tree[i] & 0xFF] = next_code[tree[i] >> 8]++;
sym_lens[tree[i] & 0xFF] = tree[i] >> 8;
}
uint16 running_tree_base = 0;
for (byte bits = 0; bits <= max_bits; bits++) {
tree_base[bits] = running_tree_base;
running_tree_base += len_count[bits];
}
uint16 fast_table[1 << FAST_BITS], fast_off = 0;
for (byte bits = 1; bits <= max_bits && bits <= FAST_BITS; ++bits) {
uint32 num_entries = len_count[bits];
uint32 span = 1 << (FAST_BITS - bits);
for (uint32 i = 0; i < num_entries; i++) {
uint16 y = tree_base[bits] + i;
uint32 entry = tree[y];
for (uint32 j = 0; j < span; j++) {
fast_table[fast_off + j] = entry;
}
fast_off += span;
}
}
byte target[1 << MAX_BITS] = { 0 };
for (uint16 cur = 0; cur < (1 << MAX_BITS); cur++) {
int len = 1;
while (len < max_bits && cur >= table_base[len + 1]) {
++len;
}
uint16 off = (cur - table_base[len]) >> (MAX_BITS - len);
uint16 y = tree_base[len] + off;
byte sym = tree[y] & 0xFF;
ASSERT(len == sym_lens[sym]);
ASSERT((cur >> (MAX_BITS - len)) == sym_codes[sym]);
target[cur] = sym;
if (cur < table_base[FAST_BITS + 1]) {
uint16 fast_cur = cur >> (MAX_BITS - FAST_BITS);
uint16 fast_entry = fast_table[fast_cur];
ASSERT(tree[y] == fast_entry);
}
}
for (int y = 0; y < 0x100; y++) {
if (counts[y] > 0) {
int ct = 0;
for (int x = 0; x < (1 << MAX_BITS); x++) {
ct += target[x] == y;
}
ASSERT(ct > 0);
}
}
reader->Rewind();
uint32 total_bytes = 0;
while ((n = reader->Read(buf, sizeof(buf))) > 0) {
for (uint32 i = 0; i < n; i++) {
ASSERT(sym_lens[buf[i]] > 0);
total_bytes += sym_lens[buf[i]];
}
}
total_bytes = (total_bytes + 7) >> 3;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment