Skip to content

Instantly share code, notes, and snippets.

@snarkbait
Created August 9, 2015 00:45
Show Gist options
  • Save snarkbait/c939953337ad74d1ab04 to your computer and use it in GitHub Desktop.
Save snarkbait/c939953337ad74d1ab04 to your computer and use it in GitHub Desktop.
Huffman Tree example
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
import java.util.Arrays;
/**
* BitStream class
* for reading and writing variable lengths of bits to a byte array
* @author /u/Philboyd_Studge
*/
public class BitStream {
private final int DEFAULT_SIZE = 256;
// byte array for actual BitStream
private byte[] bank;
// boolean array representation of a single byte
private boolean[] bits = new boolean[8];
// current position in bit array
private int bitPosition;
// current position in byte array
private int bytePosition;
// are we at the end of the bank
private boolean endOfBank = false;
// number of leftover bits short of a full byte
private byte padBits;
private boolean closed = false;
/**
* Creates a new BitStream of DEFAULT_SIZE
*/
public BitStream() {
bank = new byte[DEFAULT_SIZE];
}
/**
* Creates a new BitStream of given size
* @param size size of BitStream
*/
public BitStream(int size) {
bank = new byte[size];
}
/**
* Creates a new BitStream from a byte array
* assumes last byte is padBits
* @param bank byte array of encoded bits
*/
public BitStream(byte[] bank)
{
this.bank = bank;
this.padBits = bank[bank.length - 1];
pullByte();
}
/**
* Get the byte array
* @return byte array of BitStream
*/
public byte[] getBank()
{
return bank;
}
/**
* grow byte array by DEFAULT_SIZE
* @return byte array grown by DEFAULT_SIZE
*/
private byte[] grow()
{
return Arrays.copyOf(bank, bank.length + DEFAULT_SIZE);
}
/**
* size of array
* @return size of array
*/
public int length()
{
return bank.length;
}
/**
* returns EOB() or EndOfBank flag
* @return true if end of bank
*/
public boolean EOB()
{
return endOfBank;
}
/**
* add a single bit to the BitStream
*
* @param bit <code>true</code> for 1, <code>false</code> for 0
*/
public void pushBit(boolean bit)
{
if (closed) return;
bits[bitPosition] = bit;
bitPosition++;
if (bitPosition > 7) {
flush();
}
}
/**
* flush the full byte to the BitStream
*/
private void flush() {
int makeByte = 0;
for (int i = 0; i < 8; i++) {
// push 1's to the correct bit position
if (bits[i]) makeByte |= 1 << 7 - i;
}
bank[bytePosition] = (byte) (makeByte & 0xff);
bytePosition++;
if (bytePosition >= bank.length) bank = grow();
bitPosition = 0;
clearBits();
}
/**
* set all bits in the current bit[] array to 0
*/
private void clearBits() {
for (int i = 0; i < bits.length; i++) {
bits[i] = false;
}
}
/**
* Push bits to BitStream
* length will be based on highest one-bit in integer
* @param inBits integer value to push
*/
public void pushBits(int inBits) {
int len = 31 - Integer.numberOfLeadingZeros(inBits);
while(len >= 0) {
pushBit((inBits & (1 << len)) == 1 << len);
len--;
}
}
/**
* Push bits to BitStream
* pushes a set length of bits, padding with zeroes if necessary
* if value length is greater than length, will push entire value
* @param value integer value to push
* @param length number of bits to push
*/
public void pushBits(int value, int length)
{
for (int i = length - 1; i >= 0; i--)
{
pushBit((value >> i & 1) == 1);
}
}
/**
* Push bits to BitStream
* pushes a String representation of a binary number
* @param bitString
*/
public void pushBits(String bitString)
{
for (int i = 0; i < bitString.length(); i++)
{
pushBit(bitString.charAt(i) == '1');
}
}
/**
* pull a byte of the current byte position
* in the array
*/
private void pullByte()
{
clearBits();
byte current = bank[bytePosition];
for (int i = 7, j = 0; i >= 0; i--, j++)
{
int flagBit = 1 << i;
if ((current & flagBit) == flagBit) bits[j] = true;
}
bytePosition++;
if (bytePosition > bank.length - 1) endOfBank = true;
bitPosition = 0;
}
/**
* Returns a single bit at bitPosition
* @return bit boolean true for 1, false for 0
*/
public boolean readBit()
{
boolean bit = bits[bitPosition];
bitPosition++;
if (bytePosition == bank.length - 1 && bitPosition > 7 - padBits) endOfBank = true;
if (bitPosition > 7)
{
pullByte();
}
return bit;
}
/**
* read <code>length</code> number of bits from the stream
* @param length
* @return integer
*/
public int readBits(int length)
{
int retval = 0;
for (int i = length - 1; i >= 0; i--)
{
retval |= ((readBit()) ? 1 : 0) << i;
}
return retval;
}
/**
* close stream for writing operations.
* will pull the first byte for reading operations.
*/
public void close() {
if (closed) return;
closed = true;
padBits = (byte)((-(bitPosition) + 8) % 8);
if (padBits > 0)
{
flush();
}
bank[bytePosition] = padBits;
bank = Arrays.copyOfRange(bank, 0, bytePosition + 1);
bitPosition = 0;
bytePosition = 0;
pullByte();
}
@Override
public String toString()
{
if (bitPosition != 0 ) flush();
String retval = "";
for (int i = 0; i < bank.length; i++)
{
if (i % 24 == 0) retval += "\n";
retval += Integer.toHexString((int) (bank[i] & 0xff)) + " ";
}
return retval;
}
}
/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* Huffman Tree class
* for /r/javaexamples
*
* @author /u/Philboyd_Studge
*/
import java.util.Arrays;
public class HuffmanTree
{
// the Tree of Trees
private Tree htree;
// the bit codes and lengths
private int[] huffmanCodes = new int[256];
private int[] huffmanLengths = new int[256];
/**
* Create Huffman Tree for encoding
* and populates the code/length arrays
* based on a 256 element array of frequency counts
* @param frequencies integer array of frequency counts
*/
public HuffmanTree(int[] frequencies)
{
this.htree = getHuffmanTree(frequencies);
getCodes();
}
/**
* Create Huffman Tree for decoding
* from a BitStream of the encoded tree
* frequencies have been discarded
* @param bs BitStream of encoded tree
*/
public HuffmanTree(BitStream bs)
{
this.htree = getHuffmanTree(bs);
getCodes();
}
/**
* get Tree
* @return Tree Huffman Tree
*/
public Tree getTree()
{
return htree;
}
/**
* Encode a string to a huffman-encoded BitStream
* @param input String to encode
* @return output BitStream encoded stream
*/
public BitStream encode(String input)
{
BitStream output = new BitStream();
for (int i = 0; i < input.length() ;i++ )
{
output.pushBits(huffmanCodes[(int) input.charAt(i)], huffmanLengths[(int) input.charAt(i)]);
}
output.close();
return output;
}
/**
* Encode a byte array to a huffman-encoded BitStream
* @param input byte[] array to encode
* @return output BitStream encoded stream
*/
public BitStream encode(byte[] input)
{
BitStream output = new BitStream();
for (int i = 0; i < input.length; i++)
{
output.pushBits(huffmanCodes[input[i]], huffmanLengths[input[i]]);
}
output.close();
return output;
}
/**
* Decode a huffman-encoded BitStream to String
* @param input BitStream of encoded data
* @return output decoded String
*/
public String decode(BitStream input)
{
String output = "";
while (!input.EOB())
{
output += (char) htree.getCode(input);
}
return output;
}
/**
* Decode a huffman-encoded BitStream to byte array
* @param input BitStream of encoded data
* @return output byte array
*/
public byte[] decodeBytes(BitStream input)
{
byte[] output = new byte[input.length() * 4];
int counter = 0;
while (!input.EOB())
{
output[counter] = (byte) (htree.getCode(input) & 0xff);
counter++;
}
return Arrays.copyOfRange(output, 0, counter + 1);
}
/**
* Build Tree from frequency array
* Stores them in a Priority Queue
* pulls out two smallest and adds them together
* creates a new subtree
* @param frequencies integer array of frequencies
* @return Tree huffman tree
*/
private Tree getHuffmanTree(int[] frequencies)
{
BinaryHeap<Tree> heap = new BinaryHeap<>();
for (int i = 0; i < frequencies.length ; i++)
{
if (frequencies[i] > 0)
{
// add all frequencies > 0 as new subtree with no children
heap.add(new Tree(i, frequencies[i]));
}
}
while (heap.length() > 1)
{
Tree t1 = heap.remove();
Tree t2 = heap.remove();
heap.add(new Tree(t1, t2));
}
return heap.remove();
}
/**
* Re-build tree from BitStream
* frequencies were not stored but are
* now unimportant
* @param bs BitStream of encoded tree 1 bit + literal byte or 0
* @return Tree Huffman Tree
*/
public Tree getHuffmanTree(BitStream bs)
{
if (bs.readBit())
{
return new Tree(bs.readBits(8), 0);
}
else
{
Tree left = getHuffmanTree(bs);
Tree right = getHuffmanTree(bs);
return new Tree(left, right);
}
}
/**
* Build huffman codes based on current tree
*/
private void getCodes()
{
if (htree.root == null) return;
getCodes(htree.root);
}
/**
* recursive helper class
*/
private void getCodes(Node root)
{
if (!htree.isLeaf(root))
{
root.left.huffmanCode = root.huffmanCode << 1;
root.left.huffmanLen = root.huffmanLen + 1;
getCodes(root.left);
root.right.huffmanCode = root.huffmanCode << 1 ^ 1;
root.right.huffmanLen = root.huffmanLen + 1;
getCodes(root.right);
}
else
{
huffmanCodes[root.index] = root.huffmanCode;
huffmanLengths[root.index] = root.huffmanLen;
}
}
/**
* Show all non-zero codes
*
*/
public void displayCodes()
{
for (int i = 0; i < 256; i++)
{
if (huffmanLengths[i] > 0)
{
System.out.println(i + " : " + toBinaryString(huffmanCodes[i], huffmanLengths[i]));
}
}
}
/**
* Binary String method with padding/truncating to specified length
* @param value integer value to convert
* @param length integer length to convert, adding zeroes at the front if necessary
* @return retval String binary representation of value at specified length
*/
public static String toBinaryString(int value, int length)
{
String retval = "";
for (int i = length - 1; i >= 0; i--)
{
retval += ((value >> i & 1) == 1) ? "1" : "0";
}
return retval;
}
/**
* Tree class
*/
class Tree implements Comparable<Tree>
{
Node root;
/**
* Create tree with childless leaf node
* @param index integer of character/byte value
* @param frequency count of occuring frequency
*/
public Tree(int index, int frequency)
{
root = new Node(index, frequency);
}
/**
* Create subtree with null node as root
* and total of two subtree frequencies
* @param tree1 left subtree
* @param tree2 right subtree
*/
public Tree(Tree tree1, Tree tree2)
{
root = new Node();
root.left = tree1.root;
root.right = tree2.root;
root.frequency = tree1.root.frequency + tree2.root.frequency;
}
/**
* Encode Huffman Tree to BitStream
* if leaf node pushes 1 + literal byte
* otherwise 0
* @return bs BitStream with encoded tree
*/
public BitStream encodedTree()
{
BitStream bs = new BitStream();
encodedTree(root, bs);
bs.close();
//System.out.println(bs);
return bs;
}
/**
* recursive helper method
*/
private void encodedTree(Node node, BitStream bs)
{
if (isLeaf(node))
{
bs.pushBit(true);
bs.pushBits(node.index, 8);
}
else
{
bs.pushBit(false);
encodedTree(node.left, bs);
encodedTree(node.right, bs);
}
}
/**
* Get individual huffman code from current spot in tree
* recurses until leaf node found
*/
public int getCode(BitStream bs)
{
Node current = root;
boolean bit;
while (!isLeaf(current))
{
bit = bs.readBit();
if (bit) current = current.right;
else current = current.left;
}
return current.index;
}
/**
* is node a leaf node (childless)
* @param n Node to test
* @return true if node has no children
*/
public boolean isLeaf(Node n)
{
return n.left == null;
}
@Override
public int compareTo(Tree t)
{
return root.frequency - t.root.frequency;
}
}
/**
* Node Class
*
*/
class Node
{
// actual ascii character value
int index;
// count
int frequency;
// integer value of huffman code
int huffmanCode;
// legth of huffman code in bits
int huffmanLen;
// left child
Node left;
//right child
Node right;
/**
* Create blank Node
*/
public Node()
{
}
/**
* create Node based on index value and frequency count
*/
public Node(int index, int frequency)
{
this.index = index;
this.frequency = frequency;
}
@Override
public String toString()
{
return this.index + " : " + this.frequency;
}
}
}
import java.util.Arrays;
public class HuffmanTreeTest
{
public static int[] getFrequencies(String input)
{
int[] freq = new int[256];
for (int i = 0; i < input.length(); i++ )
{
int value = (int) input.charAt(i);
freq[value]++;
}
return freq;
}
public static void main(String[] args)
{
String testString = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt "
+ "ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco "
+ "laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in "
+ "voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat "
+ "non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.";
// get frequency array
int[] frequencies = getFrequencies(testString);
// get original length
int orig = testString.length();
System.out.println("Length of original string: " + orig);
System.out.println("\nFrequency array:");
System.out.println(Arrays.toString(frequencies));
// create huffman tree from frequencies
HuffmanTree ht = new HuffmanTree(frequencies);
ht.displayCodes();
// get BitStream of tree
BitStream bs = ht.getTree().encodedTree();
System.out.println("Huffman Tree BitStream:");
System.out.println(bs);
int treelen = bs.getBank().length;
System.out.println("Length of tree stream: " + treelen);
// encode string to bitstream
BitStream os = ht.encode(testString);
//BitStream os = ht.encode(testString.getBytes());
System.out.println("Data BitStream:");
System.out.println(os);
int datalen = os.getBank().length;
System.out.println("length of data stream: " + datalen);
System.out.println("Compressed length: " + (datalen + treelen));
System.out.println("Compressed %:" + (((float)(orig - treelen - datalen)/orig) * 100));
// re-create huffman tree from encoded bitstream
HuffmanTree dt = new HuffmanTree(bs);
dt.displayCodes();
// decode back to string
String output = dt.decode(os);
System.out.println(output);
}
}
Length of original string: 445
Frequency array:
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 68, 0, 0, 0, 0, 0, 0, 0
, 0, 0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0
, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 29, 3, 16, 18, 37, 3, 3, 1, 42, 0, 0, 21, 17, 24, 29, 11, 5, 22, 18
, 32, 28, 3, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
32 : 110
44 : 0101111
46 : 000000
68 : 000011010
69 : 000011011
76 : 00001000
85 : 00001001
97 : 1000
98 : 0101100
99 : 10110
100 : 11101
101 : 1111
102 : 0101101
103 : 0101110
104 : 00001100
105 : 001
108 : 0001
109 : 10111
110 : 0110
111 : 1001
112 : 01010
113 : 000001
114 : 0100
115 : 11100
116 : 1010
117 : 0111
118 : 0000111
120 : 0000101
Huffman Tree BitStream:
2 5d 71 14 ca ad e0 b4 28 94 5b b5 b2 d2 5c 97 2 c5 66 59 e5 8b 75 d4
58 6d eb a2 c7 6d 48 b 9d 92 ca 1
Length of tree stream: 36
Data BitStream:
8 94 fb e2 ae 3d f7 64 65 37 d 68 bf d2 fd 69 6e 7d ab e9 d3 47 4a 8f
2c 59 76 f1 34 bf 73 fb bb 3b cb f2 f3 dd 5f 75 4a 62 d6 3d 3d 76 ac f5
86 16 4a 7e fa dd 91 94 fd 78 5c d1 a0 48 2f 0 60 9a de c6 fa 3b ae 58
df 7 f6 31 75 f8 17 3c cd 3c a4 7e ef b e9 63 51 46 5b 38 8c 5e d3 86
16 4a 1e 66 3c 39 eb 40 90 5c ab 78 5d f1 ad 37 bc f6 75 a5 b9 e0 bc 50
18 34 e7 9a 1e bf 14 74 fd d9 19 4c 5b 27 a9 3c 33 db be 86 b1 6c 1e 45
d5 51 5f 83 f8 9a df ce 7e b1 11 7b ee c8 ca 7e f7 cb 5d 71 8a cc e2 31
95 8 62 9d 0 c1 b0 b6 f5 57 ba 6e 16 ad 36 b4 7d a2 b5 9d 47 b1 51 59
a5 b2 92 4f 7d a9 7e e3 b5 62 da ce 2a 8c b 9d 2b 56 9b 18 dd fe 7a 3b
56 bc 88 9a d0 c6 f8 f7 7f 2b c 2c 94 7b 80 1
length of data stream: 232
Compressed length: 268
Compressed %:39.775284
32 : 110
44 : 0101111
46 : 000000
68 : 000011010
69 : 000011011
76 : 00001000
85 : 00001001
97 : 1000
98 : 0101100
99 : 10110
100 : 11101
101 : 1111
102 : 0101101
103 : 0101110
104 : 00001100
105 : 001
108 : 0001
109 : 10111
110 : 0110
111 : 1001
112 : 01010
113 : 000001
114 : 0100
115 : 11100
116 : 1010
117 : 0111
118 : 0000111
120 : 0000101
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliq
ua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aut
e irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat c
upidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment