Skip to content

Instantly share code, notes, and snippets.

@skyzh
Last active August 29, 2015 14:07
Show Gist options
  • Save skyzh/8a6fe35a3e46ecbde98a to your computer and use it in GitHub Desktop.
Save skyzh/8a6fe35a3e46ecbde98a to your computer and use it in GitHub Desktop.
Huffman Tree Encoding (Compressor)
//Drag your file on the excutive file
//C++
#include <algorithm>
#include <map>
#include <functional>
#include <vector>
#include <list>
#include <queue>
#include <stack>
#include <bitset>
#include <iostream>
#include <fstream>
#include <sstream>
#include <cstdlib>
#include <ctime>
using namespace std;
const int MAX_HUFFMANTREE_LEAVES = 10000000;
const int KEY_LENGTH_DESCTIPTION = 8;
const int KEY_INT_LENGTH = 32;
typedef unsigned long KINT;
class KeyStore
{
public:
char ch;
KINT key;
int GetKeyLength()
{
return (key >> (KEY_INT_LENGTH - KEY_LENGTH_DESCTIPTION));
}
stack<bool> GetWrite()
{
stack<bool> WaitToWrite;
KINT tmpKey = key;
for (int i = 0; i < GetKeyLength(); i++)
{
WaitToWrite.push(tmpKey % 2 == 0 ? false : true);
tmpKey = tmpKey >> 1;
}
return WaitToWrite;
}
};
class HuffmanTreeNode
{
public:
HuffmanTreeNode *left, *right, *parent;
char ch;
bool isLeft;
bool hasCharacter;
int count;
int id;
};
class HuffmanTreeData
{
private:
map<char, int> AppearCount;
public:
HuffmanTreeData()
{
AppearCount.clear();
}
void ViewChar(char ch)
{
if (AppearCount.find(ch) == AppearCount.end())
{
AppearCount.insert(make_pair(ch, 1));
}
else
{
AppearCount[ch]++;
}
}
vector<HuffmanTreeNode*> CreateNodeList()
{
vector<HuffmanTreeNode*>NodeList;
NodeList.clear();
for (map<char, int>::iterator iter = AppearCount.begin(); iter != AppearCount.end(); iter++)
{
HuffmanTreeNode *node = new HuffmanTreeNode();
node->left = node->right = node->parent = NULL;
node->ch = (*iter).first;
node->count = (*iter).second;
node->isLeft = false;
node->hasCharacter = true;
NodeList.push_back(node);
}
return NodeList;
}
};
bool NodeCompare(HuffmanTreeNode *a, HuffmanTreeNode *b)
{
return a->count < b->count;
}
class HuffmanTree
{
private:
HuffmanTreeNode *Leaves[MAX_HUFFMANTREE_LEAVES];
public:
HuffmanTree()
{
for (int i = 0; i < MAX_HUFFMANTREE_LEAVES; i++)
{
Leaves[i] = NULL;
}
}
static HuffmanTree *CreateTree(HuffmanTreeData *TreeData)
{
HuffmanTree *Tree = new HuffmanTree();
vector<HuffmanTreeNode*>NodeList = TreeData->CreateNodeList();
while (NodeList.size()>1)
{
HuffmanTreeNode *a, *b, *parent;
sort(NodeList.begin(), NodeList.end(), NodeCompare);
//Processing: Copy
{
vector<HuffmanTreeNode*>::iterator iter = NodeList.begin();
a = *iter;
b = *(iter + 1);
NodeList.erase(iter + 1); NodeList.erase(iter);
}
parent = new HuffmanTreeNode();
parent->hasCharacter = false;
parent->left = a;
parent->right = b;
parent->parent = NULL;
parent->count = a->count + b->count;
a->parent = parent;
b->parent = parent;
a->isLeft = true;
b->isLeft = false;
NodeList.push_back(parent);
}
HuffmanTreeNode* Root = NodeList[0];
Tree->Leaves[1] = Root;
Tree->Leaves[1]->id = 1;
queue<HuffmanTreeNode*> PushQueue;
PushQueue.push(Tree->Leaves[1]);
HuffmanTreeNode *now;
while (!PushQueue.empty())
{
now = PushQueue.front();
PushQueue.pop();
if (now->left)
{
now->left->id = now->id * 2;
Tree->Leaves[now->left->id] = now->left;
PushQueue.push(now->left);
}
if (now->right)
{
now->right->id = now->id * 2 + 1;
Tree->Leaves[now->right->id] = now->right;
PushQueue.push(now->right);
}
}
return Tree;
}
map<char, KeyStore> GetKeyTable()
{
map<char, KeyStore> Table;
Table.clear();
for (int i = 0; i < MAX_HUFFMANTREE_LEAVES; i++)
{
if (Leaves[i] != NULL)
{
if (Leaves[i]->hasCharacter)
{
KeyStore keyStore;
keyStore.ch = Leaves[i]->ch;
KINT TmpKey = 0;
HuffmanTreeNode* Now = Leaves[i];
unsigned int Length = 0;
stack<int>keyStack;
while (Now->parent)
{
keyStack.push(Now->isLeft == true ? 1 : 0);
Length++;
Now = Now->parent;
}
while (!keyStack.empty()){ TmpKey = (TmpKey << 1) + keyStack.top(); keyStack.pop(); }
if (Length == 0)Length = 1;
keyStore.key = (Length << (KEY_INT_LENGTH - KEY_LENGTH_DESCTIPTION)) + TmpKey;
Table[keyStore.ch] = keyStore;
}
}
}
return Table;
}
};
void PrintTable(map<char, KeyStore> &Map)
{
for (map<char, KeyStore>::iterator iter = Map.begin(); iter != Map.end(); iter++)
{
cout << iter->first << " " << iter->second.key << endl;
}
}
void Zip(string filename,string outputFilename)
{
ifstream fin;
ofstream fout;
HuffmanTreeData *Data = new HuffmanTreeData();
cout << "Opening File " << filename << endl;
fin.open(filename, ios::in | ios::binary);
cout << "Reading..." << endl;
unsigned int byteTotal = 0;
fin.seekg(0, ios::end);
byteTotal = fin.tellg();
fin.seekg(0, ios::beg);
long ClockRecord = clock(), lstClockRecord = clock();
unsigned int bytecount = 0;
while (fin.good())
{
ClockRecord = clock();
char ch = fin.get();
bytecount += fin.gcount();
if (ClockRecord - lstClockRecord >= CLOCKS_PER_SEC)
{
lstClockRecord = ClockRecord;
cout << "> Processing: " << ((double)bytecount / byteTotal * 100) << " (" << bytecount << "/" << byteTotal << " Bytes)" << endl;
}
if (fin.good())
{
Data->ViewChar(ch);
}
}
fin.close();
cout << "Generating Key..." << endl;
HuffmanTree *Tree = HuffmanTree::CreateTree(Data);
delete Data;
map<char, KeyStore> KeyTable = Tree->GetKeyTable();
cout << "Creating Package..." << endl;
cout << "Reopening File And Writing..." << endl;
fin.open(filename, ios::in | ios::binary);
fout.open(outputFilename, ios::out | ios::binary);
cout << "Writing Key..." << endl;
unsigned int tmpN = KeyTable.size();
fout.write((const char*)&tmpN, sizeof(tmpN));
for (map<char, KeyStore>::iterator iter = KeyTable.begin(); iter != KeyTable.end(); iter++)
{
KeyStore tmp = (*iter).second;
fout.write((const char*)&tmp, sizeof(KeyStore));
}
//PrintTable(KeyTable);
cout << "Writing Data..." << endl;
queue<bool>WaitToOutput;
bytecount = 0;
while (fin.good())
{
char ch = fin.get();
bytecount += fin.gcount();
ClockRecord = clock();
if (ClockRecord - lstClockRecord >= CLOCKS_PER_SEC)
{
lstClockRecord = ClockRecord;
cout << "> Processing: " << ((double)bytecount / byteTotal * 100) << " (" << bytecount << "/" << byteTotal << " Bytes)" << endl;
}
if (fin.good())
{
stack<bool>TMP = KeyTable[ch].GetWrite();
while (!TMP.empty())
{
WaitToOutput.push(TMP.top());
TMP.pop();
}
while (WaitToOutput.size() >= 8)
{
unsigned char tmp = (char)(0);
for (int i = 0; i < 8; i++)
{
tmp = (tmp << 1) + (WaitToOutput.front() == true ? 1 : 0);
WaitToOutput.pop();
}
fout << (char)tmp;
}
}
}
unsigned char LEFT = (unsigned char)WaitToOutput.size();
unsigned char tmp = (char)(0);
while (!WaitToOutput.empty())
{
tmp = (tmp << 1) + (WaitToOutput.front() == true ? 1 : 0);
WaitToOutput.pop();
}
if (LEFT != 0)
{
tmp <<= (8 - LEFT);
fout << (char)tmp;
}
fout << (char)LEFT;
fin.close();
fout.close();
delete Tree;
cout << "Package Created " << outputFilename << endl;
return;
}
void UnZip(string filename, string outputFile)
{
ifstream fin;
ofstream fout;
HuffmanTreeData *Data = new HuffmanTreeData();
cout << "Opening File " << filename << endl;
fin.open(filename, ios::in | ios::binary);
cout << "Reading..." << endl;
cout << "Reading Key Map..." << endl;
map<string, char> KeyTable;
unsigned int tmpN;
unsigned int bytecount=0;
unsigned int byteTotal = 0;
fin.seekg(0, ios::end);
byteTotal = fin.tellg();
fin.seekg(0, ios::beg);
fin.read((char*)&tmpN, sizeof(tmpN));
bytecount += fin.gcount();
KeyTable.clear();
for (int i = 0; i < tmpN;i++)
{
KeyStore tmp;
fin.read((char*)&tmp, sizeof(KeyStore));
bytecount += fin.gcount();
KINT key = tmp.key;
string str = bitset<32>(key).to_string();
str = str.substr(str.size() - tmp.GetKeyLength(), tmp.GetKeyLength());
KeyTable[str] = tmp.ch;
//cout <<tmp.ch<<" "<< str << endl;
}
string processKey = "";
cout << "Writing File..." << endl;
fout.open(outputFile, ios::out | ios::binary);
char ch;
long ClockRecord = clock(), lstClockRecord = clock();
while (fin.good())
{
ch = fin.get();
bytecount += fin.gcount();
ClockRecord = clock();
if (ClockRecord - lstClockRecord >= CLOCKS_PER_SEC)
{
lstClockRecord = ClockRecord;
cout << "> Processing: " << ((double)bytecount / byteTotal * 100) << " (" << bytecount << "/" << byteTotal << " Bytes)" << endl;
}
if (bytecount >= byteTotal - 1)break;
bitset<8>tmp(ch);
for (int i = tmp.size() - 1; i >= 0; i--)
{
processKey += (tmp[i] == 0 ? "0" : "1");
//cout << processKey << endl;
if (KeyTable.find(processKey) != KeyTable.end())
{
fout << KeyTable[processKey];
processKey = "";
}
}
//cout << tmp;
}
char chtmp = fin.get();
int needRead = chtmp;
bitset<8> tmpBit(ch);
for (int i = tmpBit.size() - 1; i >= 8-needRead; i--)
{
processKey += (tmpBit[i] == 0 ? "0" : "1");
//cout << processKey << endl;
if (KeyTable.find(processKey) != KeyTable.end())
{
fout << KeyTable[processKey];
processKey = "";
}
}
fin.close();
fout.close();
cout << "Package Unzipped " << outputFile << endl;
return;
}
int main(int argc, char* argv[])
{
//Change Title to 'Compressor'
system("title Compressor");
string filename;
string outputFilename;
if (argc <= 1)
{
cout << "Drag the file you want to Package or Unpackage to here:" << endl;
cin >> filename;
}
else
{
filename = argv[1];
}
//Check if File Type is .LZW
bool unzip = false;
if (filename.size() >= 4)
{
if (filename.substr(filename.size() - 4, 4) == ".LZW")
{
unzip = true;
}
}
if (unzip)
{
system("title Unpackaging");
outputFilename = filename.substr(0, filename.size() - 4);
UnZip(filename, outputFilename);
}
else
{
system("title Packaging");
outputFilename = filename + ".LZW";
Zip(filename, outputFilename);
}
system("PAUSE");
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment