-
-
Save edg-l/13adde550ff81c038c5593b743156c8c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![allow(dead_code)] | |
const EOF_SYMBOL: usize = 256; | |
const MAX_SYMBOLS: usize = EOF_SYMBOL + 1; | |
const MAX_NODES: usize = MAX_SYMBOLS * 2 - 1; | |
const LUT_BITS: usize = 10; | |
const LUT_SIZE: usize = 1 << LUT_BITS; | |
const LUT_MASK: usize = LUT_SIZE - 1; | |
#[rustfmt::skip] | |
static FREQUENCY_TABLE: [u32; MAX_SYMBOLS] = [ | |
1 << 30, 4545, 2657, 431, 1950, 919, 444, 482, 2244, 617, 838, 542, 715, 1814, 304, 240, 754, 212, 647, 186, | |
283, 131, 146, 166, 543, 164, 167, 136, 179, 859, 363, 113, 157, 154, 204, 108, 137, 180, 202, 176, | |
872, 404, 168, 134, 151, 111, 113, 109, 120, 126, 129, 100, 41, 20, 16, 22, 18, 18, 17, 19, | |
16, 37, 13, 21, 362, 166, 99, 78, 95, 88, 81, 70, 83, 284, 91, 187, 77, 68, 52, 68, | |
59, 66, 61, 638, 71, 157, 50, 46, 69, 43, 11, 24, 13, 19, 10, 12, 12, 20, 14, 9, | |
20, 20, 10, 10, 15, 15, 12, 12, 7, 19, 15, 14, 13, 18, 35, 19, 17, 14, 8, 5, | |
15, 17, 9, 15, 14, 18, 8, 10, 2173, 134, 157, 68, 188, 60, 170, 60, 194, 62, 175, 71, | |
148, 67, 167, 78, 211, 67, 156, 69, 1674, 90, 174, 53, 147, 89, 181, 51, 174, 63, 163, 80, | |
167, 94, 128, 122, 223, 153, 218, 77, 200, 110, 190, 73, 174, 69, 145, 66, 277, 143, 141, 60, | |
136, 53, 180, 57, 142, 57, 158, 61, 166, 112, 152, 92, 26, 22, 21, 28, 20, 26, 30, 21, | |
32, 27, 20, 17, 23, 21, 30, 22, 22, 21, 27, 25, 17, 27, 23, 18, 39, 26, 15, 21, | |
12, 18, 18, 27, 20, 18, 15, 19, 11, 17, 33, 12, 18, 15, 19, 18, 16, 26, 17, 18, | |
9, 10, 25, 22, 22, 17, 20, 16, 6, 16, 15, 20, 14, 18, 24, 335, 1517 | |
]; | |
#[derive(Debug, Clone)] | |
struct Node { | |
pub bits: u32, | |
pub num_bits: u32, | |
pub leafs: [u16; 2], | |
pub symbol: u16, | |
} | |
#[derive(Debug, Clone, Copy)] | |
struct ConstructNode { | |
pub node_id: u16, | |
pub frequency: u32, | |
} | |
pub struct Huffman { | |
nodes: [Node; MAX_NODES], | |
root_idx: usize, | |
} | |
impl Huffman { | |
pub fn new(frequencies: &[u32; MAX_SYMBOLS]) -> Self { | |
let mut nodes: [Node; MAX_NODES] = std::array::from_fn(|_| Node { | |
bits: 0, | |
num_bits: 0, | |
leafs: [0, 0], | |
symbol: 0, | |
}); | |
let mut nodes_left: [ConstructNode; MAX_SYMBOLS] = std::array::from_fn(|_| ConstructNode { | |
frequency: 0, | |
node_id: 0, | |
}); | |
for i in 0..MAX_SYMBOLS { | |
nodes[i].num_bits = 0xFFFF_FFFF; | |
nodes[i].symbol = i as u16; | |
nodes[i].leafs = [0xFFFF, 0xFFFF]; | |
if i == EOF_SYMBOL { | |
nodes_left[i].frequency = 1; | |
} else { | |
nodes_left[i].frequency = frequencies[i]; | |
} | |
nodes_left[i].node_id = i as u16; | |
} | |
/* | |
let mut queue: BinaryHeap<ConstructNode> = | |
nodes.iter().take(MAX_SYMBOLS).map(|x| x.into()).collect(); | |
*/ | |
let mut num_nodes = MAX_SYMBOLS; | |
let mut num_nodes_left = MAX_SYMBOLS; | |
while num_nodes_left > 1 { | |
nodes_left[..num_nodes_left].sort_by(|a, b| b.frequency.cmp(&a.frequency)); | |
nodes[num_nodes].num_bits = 0; | |
nodes[num_nodes].leafs[0] = nodes_left[num_nodes_left - 1].node_id; | |
nodes[num_nodes].leafs[1] = nodes_left[num_nodes_left - 2].node_id; | |
nodes_left[num_nodes_left - 2].node_id = num_nodes as u16; | |
nodes_left[num_nodes_left - 2].frequency = nodes_left[num_nodes_left - 1].frequency + nodes_left[num_nodes_left - 2].frequency; | |
num_nodes += 1; | |
num_nodes_left -= 1; | |
} | |
let root_idx = num_nodes - 1; | |
let mut huffman = Self { nodes, root_idx }; | |
huffman.set_bits(root_idx, 0, 0); | |
huffman | |
} | |
/// Sets the bits | |
fn set_bits(&mut self, node_index: usize, bits: u32, depth: u32) { | |
if self.nodes[node_index].leafs[1] != 0xFFFF { | |
self.set_bits(self.nodes[node_index].leafs[1] as usize, bits | (1 << depth), depth + 1); | |
} | |
if self.nodes[node_index].leafs[0] != 0xFFFF { | |
self.set_bits(self.nodes[node_index].leafs[0] as usize, bits, depth + 1); | |
} | |
if self.nodes[node_index].num_bits > 0 { | |
self.nodes[node_index].bits = bits; | |
self.nodes[node_index].num_bits = depth; | |
} | |
} | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
#[test] | |
fn construct_tree() { | |
let huffman = Huffman::new(&FREQUENCY_TABLE); | |
for (i, node) in huffman.nodes.iter().enumerate().take(MAX_SYMBOLS) { | |
println!("{:02x}: {:b}", i, node.bits); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment