Created
August 5, 2016 17:58
-
-
Save tareq-si-salem/d504dec1e5b85a787678fb3dd280fbdd to your computer and use it in GitHub Desktop.
File compression using Huffman Code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.DataInputStream; | |
import java.io.DataOutputStream; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileOutputStream; | |
import java.io.PrintWriter; | |
public class HuffmanCodeGenerator { | |
// a class to hold all operation that are related | |
// to generating a huffman code | |
private void huffman() { | |
int[] twoMins = new int[2]; | |
findTwoMins(twoMins); | |
create(twoMins[0], twoMins[1]); | |
} | |
// find the 2 minimums contains in an array | |
// such that smallest[0] <= smallest[1] <= smallest[i] | |
// for all i < forest size | |
private void findTwoMins(int[] smallest) { | |
if (f.cells[0].weight < f.cells[1].weight) { | |
smallest[0] = 0; | |
smallest[1] = 1; | |
} else { | |
smallest[0] = 1; | |
smallest[1] = 0; | |
} | |
for (int i = 2; i <= lastTree; i++) { | |
if (f.cells[smallest[0]].weight > f.cells[i].weight) { | |
smallest[1] = smallest[0]; | |
smallest[0] = i; | |
} else if (f.cells[smallest[1]].weight > f.cells[i].weight) { | |
smallest[1] = i; | |
} | |
} | |
} | |
// create a new tree from two sub trees from forest | |
// such that the two are the smallest in that forest | |
// the smallest is in left | |
// and the other is added to the right | |
// update tree value | |
// the two subtrees parents is equal to lastNode | |
// reduce the forest size by one since the two subtrees | |
// are merged to one | |
private void create(int min0, int min1) { | |
t.nodes[f.cells[min0].holder].p = t.nodes[f.cells[min1].holder].p = lastNode; | |
t.nodes[lastNode].l = f.cells[min0].holder; | |
t.nodes[lastNode].r = f.cells[min1].holder; | |
f.cells[min1].weight += f.cells[min0].weight; | |
f.cells[min1].holder = lastNode; | |
f.cells[min0] = f.cells[lastTree]; | |
if (lastNode != 2 * (n - 1)) { | |
t.nodes[lastNode].p = lastNode + 1; | |
} | |
lastNode++; | |
} | |
// obtain the code 1's and 0's as an array of characters | |
// of the corresponding character c | |
char[] getCode(char c) { | |
for (int i = 0; i < characters.length; i++) { | |
if (characters[i] == c) { | |
return codes[i]; | |
} | |
} | |
return null; | |
} | |
public void startHuffman(char[] characters, int[] weights) { | |
this.characters = characters; | |
this.weights = weights; | |
n = characters.length; | |
lastNode = n; | |
lastTree = n - 1; | |
codes = new char[n][]; | |
f = new Forest(weights); | |
t = new BinaryTree(n * 2 - 1); | |
// while the forest isn't empty | |
while (lastTree >= 1) { | |
huffman(); | |
lastTree--; | |
} | |
code(); | |
System.out.println("Tree value"); | |
System.out.println("Index\tleft\tright\tparent"); | |
for (int i = 0; i < 2 * n - 1; i++) { | |
System.out.println(i + "\t" + t.nodes[i].l + "\t" + t.nodes[i].r + "\t" + t.nodes[i].p); | |
} | |
} | |
private void code() { | |
for (int i = 0; i < n; i++) { | |
char codeFlipped[] = new char[n - 1]; | |
int j = i, k = 0; | |
while (t.nodes[j].p != -1) { | |
if (t.nodes[t.nodes[j].p].l == j) { | |
codeFlipped[k] = '0'; | |
} else { | |
codeFlipped[k] = '1'; | |
} | |
j = t.nodes[j].p; | |
k++; | |
} | |
codes[i] = new char[k]; | |
j = 0; | |
while (k > 0) { | |
codes[i][j] = codeFlipped[k - 1]; | |
k--; | |
j++; | |
} | |
System.out.println("Code of character " + characters[i] + " of weight " + weights[i] + " is :[" + String.valueOf(codes[i]) + "]" | |
); | |
} | |
} | |
// a class defining a BinaryTree data structure | |
class BinaryTree { | |
// the tree defines a Node structure | |
class Node { | |
public int l = -1, r = -1, p = -1; | |
// a binary tree node can have a parent and left child and | |
// a right child | |
} | |
// create an array of nodes to hold nodes values | |
public Node[] nodes; | |
//initialize the binary tree | |
public BinaryTree(int size) { | |
nodes = new Node[size]; | |
for (int i = 0; i < size; i++) { | |
nodes[i] = new Node(); | |
} | |
} | |
} | |
// a class defining a forest a list of trees | |
class Forest { | |
// a cell corresponds to a tree | |
// we only need to point to the root (holder) | |
// weight is the number of repitions of | |
// all the leafs(characters) in a given sequence | |
class Cell { | |
int holder; | |
int weight; | |
public Cell(int holder, int weight) { | |
this.holder = holder; | |
this.weight = weight; | |
} | |
} | |
// a list of trees | |
public Cell[] cells; | |
// initializing the forest | |
public Forest(int weights[]) { | |
cells = new Cell[weights.length]; | |
for (int i = 0; i < weights.length; i++) { | |
cells[i] = new Cell(i, weights[i]); | |
} | |
} | |
} | |
// hold distinct characters values | |
char characters[]; | |
// hold their weights (number of repitions) | |
int weights[]; | |
// their coresponding codes | |
char[][] codes; | |
// binary tree variable | |
BinaryTree t; | |
// forest variable | |
Forest f; | |
// n: number if distint characters | |
// lastTree is the current last element of the forrest | |
// lastNode is the next root of the huffman tree | |
int n, lastTree, lastNode; | |
// initialize the generator | |
public HuffmanCodeGenerator() { | |
} | |
// print the result of the huffman coding | |
// and compare the original text | |
// and the guffman encoder result | |
public void printResult(char charContent[]) { | |
System.out.println("Original text with binary value: "); | |
for (int i = 0; i < charContent.length; i++) { | |
if (i % 5 == 0 && i != 0) { | |
System.out.println(""); | |
} | |
String binaryValue = Integer.toBinaryString(charContent[i]); | |
while (binaryValue.length() < 8) { | |
binaryValue = "0" + binaryValue; | |
} | |
System.out.print(binaryValue); | |
} | |
System.out.println(""); | |
System.out.println("[Original " + (charContent.length) + " bytes ], [ " + charContent.length * 8 + " bits ]"); | |
// 1 byte to represent a character: normal ASCII encoding | |
System.out.println("-----------------------"); | |
System.out.println("Using huffman coding: "); | |
int bitCount = 0; | |
for (int i = 0; i < charContent.length; i++) { | |
char[] code = getCode(charContent[i]); | |
for (int j = 0; j < code.length; j++) { | |
if (bitCount % (40) == 0 && bitCount != 0) { | |
System.out.println(""); | |
} | |
System.out.print(code[j]); | |
bitCount++; | |
} | |
} | |
System.out.println(""); | |
System.out.println("Can be reduced to [ " + (bitCount / 8) + " bytes ], [ " + bitCount + " bits ] " + (int) ((bitCount) / (8.0 * charContent.length) * 100) + " %"); | |
} | |
// Read a text file and compress it to a file with .compress extention | |
public void compressFile(char[] charContent, String fileName) { | |
try { | |
File file = new File(fileName); | |
DataOutputStream stream = new DataOutputStream(new FileOutputStream(file)); | |
// save meta data | |
// save original text content | |
stream.writeInt(charContent.length); | |
//save number of distinct characters | |
stream.writeInt(characters.length); | |
for (int i = 0; i < characters.length; i++) { | |
// save character value | |
stream.writeChar(characters[i]); | |
// save its weight | |
stream.writeInt(weights[i]); | |
} | |
// end saving meta data | |
int bitCount = 0; | |
// count the current bit in byte to save | |
byte b = 0; | |
// byte to save "Binary I/O" | |
System.out.println("saving binary sequence: "); | |
// loop through each character | |
for (int i = 0; i < charContent.length; i++) { | |
// obtain its code as an array of character example: {'1', '0','1'} | |
// a character of code can take a value of 1 or 0 | |
char code[] = getCode(charContent[i]); | |
for (int j = 0; j < code.length; j++) { | |
if (code[j] == '1') { | |
b |= 0x01; | |
// if code [i] is 1 | |
// set first bit to 1 | |
} | |
if (bitCount == 7) { | |
// byte is full | |
bitCount = 0; | |
// reset bits counter | |
String binaryValue = Integer.toBinaryString(Byte.toUnsignedInt(b)); | |
// obtain the binary value of the unsigned character(byte) | |
while (binaryValue.length() < 8) { | |
binaryValue = "0" + binaryValue; | |
// add prefixes of zeros | |
// we know a byte should have 8 bits | |
} | |
System.out.print(binaryValue); | |
stream.write(b); | |
// write byte to the stream | |
b = 0; | |
} else { | |
b <<= 1; | |
// shift to left by 1 | |
bitCount++; | |
// increment bitcount | |
} | |
} | |
} | |
// in huffman code the sum of all codes could be not a multiple of | |
// 8, so we append zeros to the last byte from the right | |
if (bitCount != 0) { | |
b <<= 7 - bitCount; | |
System.out.println(bitCount); | |
stream.write(b); | |
} | |
String binaryValue = Integer.toBinaryString(Byte.toUnsignedInt(b)); | |
while (binaryValue.length() < 8) { | |
binaryValue = "0" + binaryValue; | |
} | |
System.out.print(binaryValue); | |
System.out.println(""); | |
stream.flush(); | |
// flush to stream | |
} catch (Exception e) { | |
System.out.println(e); | |
} | |
} | |
// read a compressed file and uncompress it to original | |
public void uncompressFile(String fileName) { | |
StringBuilder builder = new StringBuilder(); | |
// string builder to save the huffman code | |
int contentLength = 0; | |
try { | |
File file = new File(fileName); | |
DataInputStream stream = new DataInputStream(new FileInputStream(file)); | |
// retreive meta data | |
contentLength = stream.readInt(); | |
int length = stream.readInt(); | |
char[] chars = new char[length]; | |
int[] weights = new int[length]; | |
for (int i = 0; i < length; i++) { | |
chars[i] = stream.readChar(); | |
weights[i] = stream.readInt(); | |
} | |
// finished retreiving data | |
// find huffman code from meta data | |
startHuffman(chars, weights); | |
// at this stage we have the huffman code | |
while (true) { | |
byte b = stream.readByte(); | |
// read byte | |
int v = Byte.toUnsignedInt(b); | |
// its int unsiged value | |
String binaryValue = Integer.toBinaryString(v); | |
while (binaryValue.length() < 8) { | |
binaryValue = "0" + binaryValue; | |
} | |
builder.append(binaryValue); | |
// obtain the binary codes as stream of characters | |
} | |
} catch (Exception e) { | |
if (builder.length() != 0) { | |
String content = builder.toString(); | |
// string content (huffman code) 01001.... | |
StringBuilder uncompressed = new StringBuilder(); | |
String codes[] = new String[this.codes.length]; | |
for (int i = 0; i < codes.length; i++) { | |
codes[i] = String.valueOf(this.codes[i]); | |
} | |
// codes are converted to strings 010,11 .... | |
int start = 0; | |
while (start < content.length() && contentLength != uncompressed.length()) { | |
// stop loop when start exceeds the content length | |
// or when the content Length == to uncompressed length | |
for (int i = 0; i < codes.length; i++) { | |
// check which huffman code the sequence starts with | |
// with a start offset | |
// we decode the code sequencialy code by code | |
if (content.startsWith(codes[i], start)) { | |
uncompressed.append(characters[i]); | |
// append the correponding character value of the code | |
start += codes[i].length(); | |
// add the code length to start | |
} | |
} | |
} | |
try { | |
PrintWriter writer = new PrintWriter(new FileOutputStream(new File(fileName.replace(".compressed", "")))); | |
// write the uncompressed string to another file | |
// and remove the .compressed extension | |
writer.append(uncompressed.toString()); | |
writer.flush(); | |
// Text I/O | |
} catch (Exception ex) { | |
} | |
} else { | |
System.out.println("builder empty"); | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment