Created
February 12, 2022 19:43
-
-
Save nauful/4cce9552badbe648c60dac04af67e5fe to your computer and use it in GitHub Desktop.
Implemented paper: "On the Implementation of Minimum Redundancy Prefix Codes, Moffat and Turpin 1997"
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
void test_huf(FileReader *reader) { | |
const int FAST_BITS = 7; | |
const int MAX_BITS = 11; | |
uint32 counts[0x100] = { 0 }; | |
byte buf[1 << 12]; | |
uint32 n = 0; | |
reader->Rewind(); | |
while ((n = reader->Read(buf, sizeof(buf))) > 0) { | |
for (uint32 i = 0; i < n; i++) { | |
++counts[buf[i]]; | |
} | |
} | |
uint32 tree[0x200], tmp_tree[0x100]; | |
int num_syms = 0; | |
for (int y = 0; y < 0x100; y++) { | |
if (!counts[y]) { | |
continue; | |
} | |
tree[num_syms++] = (counts[y] << 8) + y; | |
} | |
for (int b = 0, radix_mask = 1; b < 32; b++, radix_mask <<= 1) { | |
int cnt[2] = { 0 }; | |
for (int y = 0; y < num_syms; y++) { | |
int slot = (tree[y] & radix_mask) != 0; | |
cnt[slot]++; | |
} | |
cnt[1] = cnt[0]; | |
cnt[0] = 0; | |
for (int y = 0; y < num_syms; y++) { | |
int slot = (tree[y] & radix_mask) != 0; | |
tmp_tree[cnt[slot]++] = tree[y]; | |
} | |
memcpy(tree, tmp_tree, sizeof(uint32) * num_syms); | |
} | |
while (true) { | |
int r0 = 0, r1 = num_syms + 1; | |
tree[r1] = -1; | |
uint16 left[0x200], right[0x200]; | |
for (int i = num_syms + 1; i < 2 * num_syms; i++) { | |
tree[i] = -1; | |
left[i] = (tree[r0] < tree[r1]) ? r0++ : r1++; | |
right[i] = (tree[r0] < tree[r1]) ? r0++ : r1++; | |
tree[i] = ((tree[left[i]] & 0xFFFFFF00) + (tree[right[i]] & 0xFFFFFF00)) | 0xFF; | |
} | |
byte tree_lens[0x200]; | |
tree_lens[2 * num_syms - 1] = 0; | |
for (int i = 2 * num_syms - 1; i > num_syms; --i) { | |
tree_lens[left[i]] = tree_lens[i] + 1; | |
tree_lens[right[i]] = tree_lens[i] + 1; | |
} | |
byte max_bits = 0; | |
for (int i = 0; i < num_syms; i++) { | |
max_bits = Max(tree_lens[i], max_bits); | |
} | |
if (max_bits <= MAX_BITS) { | |
for (int i = 0; i < num_syms; i++) { | |
tree[i] = (tree[i] & 0xFF) | (tree_lens[i] << 8); | |
} | |
break; | |
} | |
uint32 shift = max_bits - MAX_BITS; | |
for (int i = 0; i < num_syms; i++) { | |
uint32 cnt = tree[i] >> 8; | |
cnt >>= shift; | |
cnt += cnt == 0; | |
tree[i] = (tree[i] & 0xFF) | (cnt << 8); | |
} | |
} | |
byte max_bits = 0; | |
byte len_count[0x100] = { 0 }; | |
for (int i = 0; i < num_syms; i++) { | |
++len_count[tree[i] >> 8]; | |
max_bits = Max(byte(tree[i] >> 8), max_bits); | |
} | |
int left = 1; | |
uint16 cur_code = 0, next_code[16], table_base[17], tree_base[16]; | |
table_base[0] = 0; | |
for (byte bits = 1; bits <= max_bits; bits++) { | |
left <<= 1; | |
left -= len_count[bits]; | |
ASSERT(left >= 0); | |
cur_code += len_count[bits - 1]; | |
cur_code <<= 1; | |
next_code[bits] = cur_code; | |
table_base[bits] = cur_code << (MAX_BITS - bits); | |
} | |
for (byte bits = max_bits + 1; bits < 17; bits++) { | |
table_base[bits] = 1 << MAX_BITS; | |
} | |
for (int i0 = 0, i1 = num_syms - 1; i0 < num_syms && i0 < i1; i0++, i1--) { | |
uint32 tmp = tree[i0]; | |
tree[i0] = tree[i1]; | |
tree[i1] = tmp; | |
} | |
uint16 sym_codes[0x100] = { 0 }; | |
byte sym_lens[0x100]; | |
for (int i = 0; i < num_syms; i++) { | |
sym_codes[tree[i] & 0xFF] = next_code[tree[i] >> 8]++; | |
sym_lens[tree[i] & 0xFF] = tree[i] >> 8; | |
} | |
uint16 running_tree_base = 0; | |
for (byte bits = 0; bits <= max_bits; bits++) { | |
tree_base[bits] = running_tree_base; | |
running_tree_base += len_count[bits]; | |
} | |
uint16 fast_table[1 << FAST_BITS], fast_off = 0; | |
for (byte bits = 1; bits <= max_bits && bits <= FAST_BITS; ++bits) { | |
uint32 num_entries = len_count[bits]; | |
uint32 span = 1 << (FAST_BITS - bits); | |
for (uint32 i = 0; i < num_entries; i++) { | |
uint16 y = tree_base[bits] + i; | |
uint32 entry = tree[y]; | |
for (uint32 j = 0; j < span; j++) { | |
fast_table[fast_off + j] = entry; | |
} | |
fast_off += span; | |
} | |
} | |
byte target[1 << MAX_BITS] = { 0 }; | |
for (uint16 cur = 0; cur < (1 << MAX_BITS); cur++) { | |
int len = 1; | |
while (len < max_bits && cur >= table_base[len + 1]) { | |
++len; | |
} | |
uint16 off = (cur - table_base[len]) >> (MAX_BITS - len); | |
uint16 y = tree_base[len] + off; | |
byte sym = tree[y] & 0xFF; | |
ASSERT(len == sym_lens[sym]); | |
ASSERT((cur >> (MAX_BITS - len)) == sym_codes[sym]); | |
target[cur] = sym; | |
if (cur < table_base[FAST_BITS + 1]) { | |
uint16 fast_cur = cur >> (MAX_BITS - FAST_BITS); | |
uint16 fast_entry = fast_table[fast_cur]; | |
ASSERT(tree[y] == fast_entry); | |
} | |
} | |
for (int y = 0; y < 0x100; y++) { | |
if (counts[y] > 0) { | |
int ct = 0; | |
for (int x = 0; x < (1 << MAX_BITS); x++) { | |
ct += target[x] == y; | |
} | |
ASSERT(ct > 0); | |
} | |
} | |
reader->Rewind(); | |
uint32 total_bytes = 0; | |
while ((n = reader->Read(buf, sizeof(buf))) > 0) { | |
for (uint32 i = 0; i < n; i++) { | |
ASSERT(sym_lens[buf[i]] > 0); | |
total_bytes += sym_lens[buf[i]]; | |
} | |
} | |
total_bytes = (total_bytes + 7) >> 3; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment