Created
August 5, 2016 17:33
-
-
Save tareq-si-salem/7fa27065a8d6ce4116d689481309ab19 to your computer and use it in GitHub Desktop.
Huffman code algorithm class implementation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public class HuffmanCodeGenerator { | |
// a class to hold all operation that are related | |
// to generating a huffman code | |
private void huffman() { | |
int[] twoMins = new int[2]; | |
findTwoMins(twoMins); | |
create(twoMins[0], twoMins[1]); | |
} | |
// find the 2 minimums contains in an array | |
// such that smallest[0] <= smallest[1] <= smallest[i] | |
// for all i < forest size | |
private void findTwoMins(int[] smallest) { | |
if (f.cells[0].weight < f.cells[1].weight) { | |
smallest[0] = 0; | |
smallest[1] = 1; | |
} else { | |
smallest[0] = 1; | |
smallest[1] = 0; | |
} | |
for (int i = 2; i <= lastTree; i++) { | |
if (f.cells[smallest[0]].weight > f.cells[i].weight) { | |
smallest[1] = smallest[0]; | |
smallest[0] = i; | |
} else if (f.cells[smallest[1]].weight > f.cells[i].weight) { | |
smallest[1] = i; | |
} | |
} | |
} | |
// create a new tree from two sub trees from forest | |
// such that the two are the smallest in that forest | |
// the smallest is in left | |
// and the other is added to the right | |
// update tree value | |
// the two subtrees parents is equal to lastNode | |
// reduce the forest size by one since the two subtrees | |
// are merged to one | |
private void create(int min0, int min1) { | |
t.nodes[f.cells[min0].holder].p = t.nodes[f.cells[min1].holder].p = lastNode; | |
t.nodes[lastNode].l = f.cells[min0].holder; | |
t.nodes[lastNode].r = f.cells[min1].holder; | |
f.cells[min1].weight += f.cells[min0].weight; | |
f.cells[min1].holder = lastNode; | |
f.cells[min0] = f.cells[lastTree]; | |
if (lastNode != 2 * (n - 1)) { | |
t.nodes[lastNode].p = lastNode + 1; | |
} | |
lastNode++; | |
} | |
// obtain the code 1's and 0's as an array of characters | |
// of the corresponding character c | |
char[] getCode(char c) { | |
for (int i = 0; i < characters.length; i++) { | |
if (characters[i] == c) { | |
return codes[i]; | |
} | |
} | |
return null; | |
} | |
// a class defining a BinaryTree data structure | |
class BinaryTree { | |
// the tree defines a Node structure | |
class Node { | |
public int l = -1, r = -1, p = -1; | |
// a binary tree node can have a parent and left child and | |
// a right child | |
} | |
// create an array of nodes to hold nodes values | |
public Node[] nodes; | |
//initialize the binary tree | |
public BinaryTree(int size) { | |
nodes = new Node[size]; | |
for (int i = 0; i < size; i++) { | |
nodes[i] = new Node(); | |
} | |
} | |
} | |
// a class defining a forest a list of trees | |
class Forest { | |
// a cell corresponds to a tree | |
// we only need to point to the root (holder) | |
// weight is the number of repitions of | |
// all the leafs(characters) in a given sequence | |
class Cell { | |
int holder; | |
int weight; | |
public Cell(int holder, int weight) { | |
this.holder = holder; | |
this.weight = weight; | |
} | |
} | |
// a list of trees | |
public Cell[] cells; | |
// initializing the forest | |
public Forest(int weights[]) { | |
cells = new Cell[weights.length]; | |
for (int i = 0; i < weights.length; i++) { | |
cells[i] = new Cell(i, weights[i]); | |
} | |
} | |
} | |
// hold distinct characters values | |
char characters[]; | |
// hold their weights (number of repitions) | |
int weights[]; | |
// their coresponding codes | |
char[][] codes; | |
// binary tree variable | |
BinaryTree t; | |
// forest variable | |
Forest f; | |
// n: number if distint characters | |
// lastTree is the current last element of the forrest | |
// lastNode is the next root of the huffman tree | |
int n, lastTree, lastNode; | |
// initialize the generator | |
public HuffmanCodeGenerator(char[] characters, int[] weights) { | |
this.characters = characters; | |
this.weights = weights; | |
n = characters.length; | |
lastNode = n; | |
lastTree = n - 1; | |
codes = new char[n][]; | |
f = new Forest(weights); | |
t = new BinaryTree(n * 2 - 1); | |
// while the forest isn't empty | |
while (lastTree >= 1) { | |
huffman(); | |
lastTree--; | |
} | |
System.out.println("Tree value"); | |
System.out.println("Index\tleft\tright\tparent"); | |
for (int i = 0; i < 2 * n - 1; i++) { | |
System.out.println(i + "\t" + t.nodes[i].l + "\t" + t.nodes[i].r + "\t" + t.nodes[i].p); | |
} | |
} | |
// print the result of the huffman coding | |
// and compare the original text | |
// and the guffman encoder result | |
public void printResult(char charContent[]) { | |
for (int i = 0; i < n; i++) { | |
char codeFlipped[] = new char[n - 1]; | |
int j = i, k = 0; | |
while (t.nodes[j].p != -1) { | |
if (t.nodes[t.nodes[j].p].l == j) { | |
codeFlipped[k] = '0'; | |
} else { | |
codeFlipped[k] = '1'; | |
} | |
j = t.nodes[j].p; | |
k++; | |
} | |
codes[i] = new char[k]; | |
j = 0; | |
while (k > 0) { | |
codes[i][j] = codeFlipped[k - 1]; | |
k--; | |
j++; | |
} | |
System.out.println("Code of character " + characters[i] + " of weight " + weights[i] + " is :[" + String.valueOf(codes[i]) + "]" | |
); | |
} | |
System.out.println("Original text with binary value: "); | |
for (int i = 0; i < charContent.length; i++) { | |
if (i % 5 == 0 && i != 0) { | |
System.out.println(""); | |
} | |
String binaryValue = Integer.toBinaryString(charContent[i]); | |
while (binaryValue.length() < 8) { | |
binaryValue = "0" + binaryValue; | |
} | |
System.out.print(binaryValue); | |
} | |
System.out.println(""); | |
System.out.println("[Original " + (charContent.length) + " bytes ], [ " + charContent.length * 8 + " bits ]"); | |
// 1 byte to represent a character: normal ASCII encoding | |
System.out.println("-----------------------"); | |
System.out.println("Using huffman coding: "); | |
int bitCount = 0; | |
for (int i = 0; i < charContent.length; i++) { | |
char[] code = getCode(charContent[i]); | |
for (int j = 0; j < code.length; j++) { | |
if (bitCount % (40) == 0 && bitCount != 0) { | |
System.out.println(""); | |
} | |
System.out.print(code[j]); | |
bitCount++; | |
} | |
} | |
System.out.println(""); | |
System.out.println("Can be reduced to [ " + (bitCount / 8) + " bytes ], [ " + bitCount + " bits ] " + (int) ((bitCount) / (8.0 * charContent.length) * 100) + " %"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment