Skip to content

Instantly share code, notes, and snippets.

@tareq-si-salem
Created August 5, 2016 17:58
Show Gist options
  • Save tareq-si-salem/d504dec1e5b85a787678fb3dd280fbdd to your computer and use it in GitHub Desktop.
Save tareq-si-salem/d504dec1e5b85a787678fb3dd280fbdd to your computer and use it in GitHub Desktop.
File compression using Huffman Code
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.PrintWriter;
public class HuffmanCodeGenerator {
// a class to hold all operation that are related
// to generating a huffman code
private void huffman() {
int[] twoMins = new int[2];
findTwoMins(twoMins);
create(twoMins[0], twoMins[1]);
}
// find the 2 minimums contains in an array
// such that smallest[0] <= smallest[1] <= smallest[i]
// for all i < forest size
private void findTwoMins(int[] smallest) {
if (f.cells[0].weight < f.cells[1].weight) {
smallest[0] = 0;
smallest[1] = 1;
} else {
smallest[0] = 1;
smallest[1] = 0;
}
for (int i = 2; i <= lastTree; i++) {
if (f.cells[smallest[0]].weight > f.cells[i].weight) {
smallest[1] = smallest[0];
smallest[0] = i;
} else if (f.cells[smallest[1]].weight > f.cells[i].weight) {
smallest[1] = i;
}
}
}
// create a new tree from two sub trees from forest
// such that the two are the smallest in that forest
// the smallest is in left
// and the other is added to the right
// update tree value
// the two subtrees parents is equal to lastNode
// reduce the forest size by one since the two subtrees
// are merged to one
private void create(int min0, int min1) {
t.nodes[f.cells[min0].holder].p = t.nodes[f.cells[min1].holder].p = lastNode;
t.nodes[lastNode].l = f.cells[min0].holder;
t.nodes[lastNode].r = f.cells[min1].holder;
f.cells[min1].weight += f.cells[min0].weight;
f.cells[min1].holder = lastNode;
f.cells[min0] = f.cells[lastTree];
if (lastNode != 2 * (n - 1)) {
t.nodes[lastNode].p = lastNode + 1;
}
lastNode++;
}
// obtain the code 1's and 0's as an array of characters
// of the corresponding character c
char[] getCode(char c) {
for (int i = 0; i < characters.length; i++) {
if (characters[i] == c) {
return codes[i];
}
}
return null;
}
public void startHuffman(char[] characters, int[] weights) {
this.characters = characters;
this.weights = weights;
n = characters.length;
lastNode = n;
lastTree = n - 1;
codes = new char[n][];
f = new Forest(weights);
t = new BinaryTree(n * 2 - 1);
// while the forest isn't empty
while (lastTree >= 1) {
huffman();
lastTree--;
}
code();
System.out.println("Tree value");
System.out.println("Index\tleft\tright\tparent");
for (int i = 0; i < 2 * n - 1; i++) {
System.out.println(i + "\t" + t.nodes[i].l + "\t" + t.nodes[i].r + "\t" + t.nodes[i].p);
}
}
private void code() {
for (int i = 0; i < n; i++) {
char codeFlipped[] = new char[n - 1];
int j = i, k = 0;
while (t.nodes[j].p != -1) {
if (t.nodes[t.nodes[j].p].l == j) {
codeFlipped[k] = '0';
} else {
codeFlipped[k] = '1';
}
j = t.nodes[j].p;
k++;
}
codes[i] = new char[k];
j = 0;
while (k > 0) {
codes[i][j] = codeFlipped[k - 1];
k--;
j++;
}
System.out.println("Code of character " + characters[i] + " of weight " + weights[i] + " is :[" + String.valueOf(codes[i]) + "]"
);
}
}
// a class defining a BinaryTree data structure
class BinaryTree {
// the tree defines a Node structure
class Node {
public int l = -1, r = -1, p = -1;
// a binary tree node can have a parent and left child and
// a right child
}
// create an array of nodes to hold nodes values
public Node[] nodes;
//initialize the binary tree
public BinaryTree(int size) {
nodes = new Node[size];
for (int i = 0; i < size; i++) {
nodes[i] = new Node();
}
}
}
// a class defining a forest a list of trees
class Forest {
// a cell corresponds to a tree
// we only need to point to the root (holder)
// weight is the number of repitions of
// all the leafs(characters) in a given sequence
class Cell {
int holder;
int weight;
public Cell(int holder, int weight) {
this.holder = holder;
this.weight = weight;
}
}
// a list of trees
public Cell[] cells;
// initializing the forest
public Forest(int weights[]) {
cells = new Cell[weights.length];
for (int i = 0; i < weights.length; i++) {
cells[i] = new Cell(i, weights[i]);
}
}
}
// hold distinct characters values
char characters[];
// hold their weights (number of repitions)
int weights[];
// their coresponding codes
char[][] codes;
// binary tree variable
BinaryTree t;
// forest variable
Forest f;
// n: number if distint characters
// lastTree is the current last element of the forrest
// lastNode is the next root of the huffman tree
int n, lastTree, lastNode;
// initialize the generator
public HuffmanCodeGenerator() {
}
// print the result of the huffman coding
// and compare the original text
// and the guffman encoder result
public void printResult(char charContent[]) {
System.out.println("Original text with binary value: ");
for (int i = 0; i < charContent.length; i++) {
if (i % 5 == 0 && i != 0) {
System.out.println("");
}
String binaryValue = Integer.toBinaryString(charContent[i]);
while (binaryValue.length() < 8) {
binaryValue = "0" + binaryValue;
}
System.out.print(binaryValue);
}
System.out.println("");
System.out.println("[Original " + (charContent.length) + " bytes ], [ " + charContent.length * 8 + " bits ]");
// 1 byte to represent a character: normal ASCII encoding
System.out.println("-----------------------");
System.out.println("Using huffman coding: ");
int bitCount = 0;
for (int i = 0; i < charContent.length; i++) {
char[] code = getCode(charContent[i]);
for (int j = 0; j < code.length; j++) {
if (bitCount % (40) == 0 && bitCount != 0) {
System.out.println("");
}
System.out.print(code[j]);
bitCount++;
}
}
System.out.println("");
System.out.println("Can be reduced to [ " + (bitCount / 8) + " bytes ], [ " + bitCount + " bits ] " + (int) ((bitCount) / (8.0 * charContent.length) * 100) + " %");
}
// Read a text file and compress it to a file with .compress extention
public void compressFile(char[] charContent, String fileName) {
try {
File file = new File(fileName);
DataOutputStream stream = new DataOutputStream(new FileOutputStream(file));
// save meta data
// save original text content
stream.writeInt(charContent.length);
//save number of distinct characters
stream.writeInt(characters.length);
for (int i = 0; i < characters.length; i++) {
// save character value
stream.writeChar(characters[i]);
// save its weight
stream.writeInt(weights[i]);
}
// end saving meta data
int bitCount = 0;
// count the current bit in byte to save
byte b = 0;
// byte to save "Binary I/O"
System.out.println("saving binary sequence: ");
// loop through each character
for (int i = 0; i < charContent.length; i++) {
// obtain its code as an array of character example: {'1', '0','1'}
// a character of code can take a value of 1 or 0
char code[] = getCode(charContent[i]);
for (int j = 0; j < code.length; j++) {
if (code[j] == '1') {
b |= 0x01;
// if code [i] is 1
// set first bit to 1
}
if (bitCount == 7) {
// byte is full
bitCount = 0;
// reset bits counter
String binaryValue = Integer.toBinaryString(Byte.toUnsignedInt(b));
// obtain the binary value of the unsigned character(byte)
while (binaryValue.length() < 8) {
binaryValue = "0" + binaryValue;
// add prefixes of zeros
// we know a byte should have 8 bits
}
System.out.print(binaryValue);
stream.write(b);
// write byte to the stream
b = 0;
} else {
b <<= 1;
// shift to left by 1
bitCount++;
// increment bitcount
}
}
}
// in huffman code the sum of all codes could be not a multiple of
// 8, so we append zeros to the last byte from the right
if (bitCount != 0) {
b <<= 7 - bitCount;
System.out.println(bitCount);
stream.write(b);
}
String binaryValue = Integer.toBinaryString(Byte.toUnsignedInt(b));
while (binaryValue.length() < 8) {
binaryValue = "0" + binaryValue;
}
System.out.print(binaryValue);
System.out.println("");
stream.flush();
// flush to stream
} catch (Exception e) {
System.out.println(e);
}
}
// read a compressed file and uncompress it to original
public void uncompressFile(String fileName) {
StringBuilder builder = new StringBuilder();
// string builder to save the huffman code
int contentLength = 0;
try {
File file = new File(fileName);
DataInputStream stream = new DataInputStream(new FileInputStream(file));
// retreive meta data
contentLength = stream.readInt();
int length = stream.readInt();
char[] chars = new char[length];
int[] weights = new int[length];
for (int i = 0; i < length; i++) {
chars[i] = stream.readChar();
weights[i] = stream.readInt();
}
// finished retreiving data
// find huffman code from meta data
startHuffman(chars, weights);
// at this stage we have the huffman code
while (true) {
byte b = stream.readByte();
// read byte
int v = Byte.toUnsignedInt(b);
// its int unsiged value
String binaryValue = Integer.toBinaryString(v);
while (binaryValue.length() < 8) {
binaryValue = "0" + binaryValue;
}
builder.append(binaryValue);
// obtain the binary codes as stream of characters
}
} catch (Exception e) {
if (builder.length() != 0) {
String content = builder.toString();
// string content (huffman code) 01001....
StringBuilder uncompressed = new StringBuilder();
String codes[] = new String[this.codes.length];
for (int i = 0; i < codes.length; i++) {
codes[i] = String.valueOf(this.codes[i]);
}
// codes are converted to strings 010,11 ....
int start = 0;
while (start < content.length() && contentLength != uncompressed.length()) {
// stop loop when start exceeds the content length
// or when the content Length == to uncompressed length
for (int i = 0; i < codes.length; i++) {
// check which huffman code the sequence starts with
// with a start offset
// we decode the code sequencialy code by code
if (content.startsWith(codes[i], start)) {
uncompressed.append(characters[i]);
// append the correponding character value of the code
start += codes[i].length();
// add the code length to start
}
}
}
try {
PrintWriter writer = new PrintWriter(new FileOutputStream(new File(fileName.replace(".compressed", ""))));
// write the uncompressed string to another file
// and remove the .compressed extension
writer.append(uncompressed.toString());
writer.flush();
// Text I/O
} catch (Exception ex) {
}
} else {
System.out.println("builder empty");
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment