Skip to content

Instantly share code, notes, and snippets.

@zhaoyao
Last active December 10, 2015 17:18
Show Gist options
  • Save zhaoyao/4467213 to your computer and use it in GitHub Desktop.
Save zhaoyao/4467213 to your computer and use it in GitHub Desktop.
Huffman Encoding
package snip;
import java.io.EOFException;
import java.util.Arrays;
import java.util.PriorityQueue;
/**
* User: zhaoyao
* Date: 1/5/13
* Time: 10:53 PM
*/
public class Huffman {
static class HuffmanCode {
int code;
int nbits;
HuffmanCode(int code, int nbits) {
this.code = code;
this.nbits = nbits;
}
@Override
public String toString() {
return "HuffmanCode{" +
"code=" + Integer.toBinaryString(code) +
", nbits=" + nbits +
'}';
}
}
static class HuffmanNode implements Comparable<HuffmanNode> {
byte sym;
int freq;
HuffmanNode left;
HuffmanNode right;
HuffmanNode(HuffmanNode left, HuffmanNode right) {
this.freq = left.freq + right.freq;
this.left = left;
this.right = right;
}
HuffmanNode(byte sym, int freq) {
this.sym = sym;
this.freq = freq;
}
@Override
public int compareTo(HuffmanNode huffmanNode) {
return this.freq - huffmanNode.freq;
}
public String toString() {
if (left == null && right == null) {
return "<" + sym + ":" + freq + ">";
} else {
return "<" + freq + ":(" + left + "), (" + right + ")>";
}
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
HuffmanNode node = (HuffmanNode) o;
if (left == null || right == null) {
return node.left == null || node.right == null;
}
return left.equals(node.left) && right.equals(node.right);
}
}
public static byte[] compress(byte[] data) throws Exception {
int[] freqs = new int[256]; // byte range (-127 - 128)
HuffmanCode[] table = new HuffmanCode[256];
int nbytes = data.length;
for (byte b : data) {
int p = b + 128; // transform the negative byte
freqs[p]++;
}
//在 huffman header中的freq table 需要使用一个byte存储符号的频率
//于是需要将所有的频率等比例缩放到0-255(UCHAR_MAX, 0xff)能容纳的大小
int max = 255;
for (int freq : freqs) {
if (freq > max) {
max = freq;
}
}
for (int i = 0; i < freqs.length; i++) {
int freq = freqs[i];
int scale = (int) (freq / ((double) max / (double) 255));
if (scale == 0 && freq != 0) {
freqs[i] = 1;
} else {
freqs[i] = scale;
}
}
HuffmanNode root = buildTree(freqs);
buildTable(root, table, 0, (short) 0);
byte[] compressed = new byte[260 + data.length]; //4bytes length + 256 bytes freq table
int offset = 0;
compressed[offset++] = (byte) ((nbytes >> 24) & 0xff);
compressed[offset++] = (byte) ((nbytes >> 16) & 0xff);
compressed[offset++] = (byte) ((nbytes >> 8) & 0xff);
compressed[offset++] = (byte) ((nbytes) & 0xff);
for (int freq : freqs) {
compressed[offset++] = (byte) freq;
}
int bitsWrite = offset * 8;
for (byte b : data) {
int c = b + 128;
if (table[c] == null) {
throw new IllegalStateException("huffman table " + c + " is null");
}
int code = table[c].code;
for (int i = 0; i < table[c].nbits; i++) {
int nb = bitsWrite / 8;
//huffman code从int的低bit开始
int k = Integer.SIZE - table[c].nbits;
boolean flag = bitGet(code, k + i);
compressed[nb] = bitSet(compressed[nb], flag, bitsWrite % 8);
bitsWrite++;
}
}
int compressedLen = bitsWrite % 8 == 0 ? bitsWrite / 8 : bitsWrite / 8 + 1;
return Arrays.copyOfRange(compressed, 0, compressedLen);
}
public static byte[] decompress(byte[] compressed) throws EOFException {
int offset = 0;
int originalLen = (compressed[offset++] & 0xff) << 24 |
(compressed[offset++] & 0xff) << 16 |
(compressed[offset++] & 0xff) << 8 |
(compressed[offset++] & 0xff);
int[] freqs = new int[256];
for (int i = 0; i < freqs.length; i++) {
freqs[i] = compressed[offset++] & 0xff;
}
HuffmanNode root = buildTree(freqs);
byte[] result = new byte[originalLen];
int bitsRead = offset * 8;
int i = 0;
while (i < originalLen) {
HuffmanNode node = root;
for (; ; ) {
if (bitsRead / 8 >= compressed.length) {
throw new EOFException();
}
byte b = compressed[bitsRead / 8];
if (bitGet(b, 24 + bitsRead % 8)) { //code只占用最后8位,所以从24位开始算起
node = node.right;
} else {
node = node.left;
}
bitsRead++;
if (node.left == null && node.right == null) {
result[i++] = node.sym;
break;
}
}
}
return result;
}
private static boolean bitGet(int val, int p) {
int mask = 0x80000000;
while (p-- > 0) {
mask >>>= 1;
}
return (val & mask) == mask;
}
private static byte bitSet(byte val, boolean flag, int i) {
int mask = 0x80;
if (i >= Byte.SIZE) {
throw new IllegalArgumentException("bit offset overflow: " + i);
}
while (i-- > 0) {
mask >>>= 1;
}
return (byte) (flag ? val | mask : val & (~mask));
}
private static void buildTable(HuffmanNode node, HuffmanCode[] table, int code, int nbits) {
if (node.left != null) {
buildTable(node.left, table, code << 1, (nbits + 1));
}
if (node.right != null) {
buildTable(node.right, table, code << 1 | 0x0001, (nbits + 1));
}
if (node.left == null && node.right == null) {
table[node.sym + 128] = new HuffmanCode(code, nbits);
}
}
private static HuffmanNode buildTree(int[] freqs) {
PriorityQueue<HuffmanNode> pq = new PriorityQueue<HuffmanNode>();
for (int i = 0; i < freqs.length; i++) {
int freq = freqs[i];
if (freq != 0) {
pq.add(new HuffmanNode((byte) (i - 128), freq));
}
}
while (!pq.isEmpty()) {
if (pq.size() >= 2) {
HuffmanNode left = pq.poll();
HuffmanNode right = pq.poll();
pq.add(new HuffmanNode(left, right));
} else {
break;
}
}
if (pq.size() != 1) {
throw new IllegalStateException("Unexpected huffman tree root: " + pq.size());
}
return pq.poll();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment