Last active
August 29, 2015 14:07
-
-
Save skyzh/8a6fe35a3e46ecbde98a to your computer and use it in GitHub Desktop.
Huffman Tree Encoding (Compressor)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Drag your file on the excutive file | |
//C++ | |
#include <algorithm> | |
#include <map> | |
#include <functional> | |
#include <vector> | |
#include <list> | |
#include <queue> | |
#include <stack> | |
#include <bitset> | |
#include <iostream> | |
#include <fstream> | |
#include <sstream> | |
#include <cstdlib> | |
#include <ctime> | |
using namespace std; | |
const int MAX_HUFFMANTREE_LEAVES = 10000000; | |
const int KEY_LENGTH_DESCTIPTION = 8; | |
const int KEY_INT_LENGTH = 32; | |
typedef unsigned long KINT; | |
class KeyStore | |
{ | |
public: | |
char ch; | |
KINT key; | |
int GetKeyLength() | |
{ | |
return (key >> (KEY_INT_LENGTH - KEY_LENGTH_DESCTIPTION)); | |
} | |
stack<bool> GetWrite() | |
{ | |
stack<bool> WaitToWrite; | |
KINT tmpKey = key; | |
for (int i = 0; i < GetKeyLength(); i++) | |
{ | |
WaitToWrite.push(tmpKey % 2 == 0 ? false : true); | |
tmpKey = tmpKey >> 1; | |
} | |
return WaitToWrite; | |
} | |
}; | |
class HuffmanTreeNode | |
{ | |
public: | |
HuffmanTreeNode *left, *right, *parent; | |
char ch; | |
bool isLeft; | |
bool hasCharacter; | |
int count; | |
int id; | |
}; | |
class HuffmanTreeData | |
{ | |
private: | |
map<char, int> AppearCount; | |
public: | |
HuffmanTreeData() | |
{ | |
AppearCount.clear(); | |
} | |
void ViewChar(char ch) | |
{ | |
if (AppearCount.find(ch) == AppearCount.end()) | |
{ | |
AppearCount.insert(make_pair(ch, 1)); | |
} | |
else | |
{ | |
AppearCount[ch]++; | |
} | |
} | |
vector<HuffmanTreeNode*> CreateNodeList() | |
{ | |
vector<HuffmanTreeNode*>NodeList; | |
NodeList.clear(); | |
for (map<char, int>::iterator iter = AppearCount.begin(); iter != AppearCount.end(); iter++) | |
{ | |
HuffmanTreeNode *node = new HuffmanTreeNode(); | |
node->left = node->right = node->parent = NULL; | |
node->ch = (*iter).first; | |
node->count = (*iter).second; | |
node->isLeft = false; | |
node->hasCharacter = true; | |
NodeList.push_back(node); | |
} | |
return NodeList; | |
} | |
}; | |
bool NodeCompare(HuffmanTreeNode *a, HuffmanTreeNode *b) | |
{ | |
return a->count < b->count; | |
} | |
class HuffmanTree | |
{ | |
private: | |
HuffmanTreeNode *Leaves[MAX_HUFFMANTREE_LEAVES]; | |
public: | |
HuffmanTree() | |
{ | |
for (int i = 0; i < MAX_HUFFMANTREE_LEAVES; i++) | |
{ | |
Leaves[i] = NULL; | |
} | |
} | |
static HuffmanTree *CreateTree(HuffmanTreeData *TreeData) | |
{ | |
HuffmanTree *Tree = new HuffmanTree(); | |
vector<HuffmanTreeNode*>NodeList = TreeData->CreateNodeList(); | |
while (NodeList.size()>1) | |
{ | |
HuffmanTreeNode *a, *b, *parent; | |
sort(NodeList.begin(), NodeList.end(), NodeCompare); | |
//Processing: Copy | |
{ | |
vector<HuffmanTreeNode*>::iterator iter = NodeList.begin(); | |
a = *iter; | |
b = *(iter + 1); | |
NodeList.erase(iter + 1); NodeList.erase(iter); | |
} | |
parent = new HuffmanTreeNode(); | |
parent->hasCharacter = false; | |
parent->left = a; | |
parent->right = b; | |
parent->parent = NULL; | |
parent->count = a->count + b->count; | |
a->parent = parent; | |
b->parent = parent; | |
a->isLeft = true; | |
b->isLeft = false; | |
NodeList.push_back(parent); | |
} | |
HuffmanTreeNode* Root = NodeList[0]; | |
Tree->Leaves[1] = Root; | |
Tree->Leaves[1]->id = 1; | |
queue<HuffmanTreeNode*> PushQueue; | |
PushQueue.push(Tree->Leaves[1]); | |
HuffmanTreeNode *now; | |
while (!PushQueue.empty()) | |
{ | |
now = PushQueue.front(); | |
PushQueue.pop(); | |
if (now->left) | |
{ | |
now->left->id = now->id * 2; | |
Tree->Leaves[now->left->id] = now->left; | |
PushQueue.push(now->left); | |
} | |
if (now->right) | |
{ | |
now->right->id = now->id * 2 + 1; | |
Tree->Leaves[now->right->id] = now->right; | |
PushQueue.push(now->right); | |
} | |
} | |
return Tree; | |
} | |
map<char, KeyStore> GetKeyTable() | |
{ | |
map<char, KeyStore> Table; | |
Table.clear(); | |
for (int i = 0; i < MAX_HUFFMANTREE_LEAVES; i++) | |
{ | |
if (Leaves[i] != NULL) | |
{ | |
if (Leaves[i]->hasCharacter) | |
{ | |
KeyStore keyStore; | |
keyStore.ch = Leaves[i]->ch; | |
KINT TmpKey = 0; | |
HuffmanTreeNode* Now = Leaves[i]; | |
unsigned int Length = 0; | |
stack<int>keyStack; | |
while (Now->parent) | |
{ | |
keyStack.push(Now->isLeft == true ? 1 : 0); | |
Length++; | |
Now = Now->parent; | |
} | |
while (!keyStack.empty()){ TmpKey = (TmpKey << 1) + keyStack.top(); keyStack.pop(); } | |
if (Length == 0)Length = 1; | |
keyStore.key = (Length << (KEY_INT_LENGTH - KEY_LENGTH_DESCTIPTION)) + TmpKey; | |
Table[keyStore.ch] = keyStore; | |
} | |
} | |
} | |
return Table; | |
} | |
}; | |
void PrintTable(map<char, KeyStore> &Map) | |
{ | |
for (map<char, KeyStore>::iterator iter = Map.begin(); iter != Map.end(); iter++) | |
{ | |
cout << iter->first << " " << iter->second.key << endl; | |
} | |
} | |
void Zip(string filename,string outputFilename) | |
{ | |
ifstream fin; | |
ofstream fout; | |
HuffmanTreeData *Data = new HuffmanTreeData(); | |
cout << "Opening File " << filename << endl; | |
fin.open(filename, ios::in | ios::binary); | |
cout << "Reading..." << endl; | |
unsigned int byteTotal = 0; | |
fin.seekg(0, ios::end); | |
byteTotal = fin.tellg(); | |
fin.seekg(0, ios::beg); | |
long ClockRecord = clock(), lstClockRecord = clock(); | |
unsigned int bytecount = 0; | |
while (fin.good()) | |
{ | |
ClockRecord = clock(); | |
char ch = fin.get(); | |
bytecount += fin.gcount(); | |
if (ClockRecord - lstClockRecord >= CLOCKS_PER_SEC) | |
{ | |
lstClockRecord = ClockRecord; | |
cout << "> Processing: " << ((double)bytecount / byteTotal * 100) << " (" << bytecount << "/" << byteTotal << " Bytes)" << endl; | |
} | |
if (fin.good()) | |
{ | |
Data->ViewChar(ch); | |
} | |
} | |
fin.close(); | |
cout << "Generating Key..." << endl; | |
HuffmanTree *Tree = HuffmanTree::CreateTree(Data); | |
delete Data; | |
map<char, KeyStore> KeyTable = Tree->GetKeyTable(); | |
cout << "Creating Package..." << endl; | |
cout << "Reopening File And Writing..." << endl; | |
fin.open(filename, ios::in | ios::binary); | |
fout.open(outputFilename, ios::out | ios::binary); | |
cout << "Writing Key..." << endl; | |
unsigned int tmpN = KeyTable.size(); | |
fout.write((const char*)&tmpN, sizeof(tmpN)); | |
for (map<char, KeyStore>::iterator iter = KeyTable.begin(); iter != KeyTable.end(); iter++) | |
{ | |
KeyStore tmp = (*iter).second; | |
fout.write((const char*)&tmp, sizeof(KeyStore)); | |
} | |
//PrintTable(KeyTable); | |
cout << "Writing Data..." << endl; | |
queue<bool>WaitToOutput; | |
bytecount = 0; | |
while (fin.good()) | |
{ | |
char ch = fin.get(); | |
bytecount += fin.gcount(); | |
ClockRecord = clock(); | |
if (ClockRecord - lstClockRecord >= CLOCKS_PER_SEC) | |
{ | |
lstClockRecord = ClockRecord; | |
cout << "> Processing: " << ((double)bytecount / byteTotal * 100) << " (" << bytecount << "/" << byteTotal << " Bytes)" << endl; | |
} | |
if (fin.good()) | |
{ | |
stack<bool>TMP = KeyTable[ch].GetWrite(); | |
while (!TMP.empty()) | |
{ | |
WaitToOutput.push(TMP.top()); | |
TMP.pop(); | |
} | |
while (WaitToOutput.size() >= 8) | |
{ | |
unsigned char tmp = (char)(0); | |
for (int i = 0; i < 8; i++) | |
{ | |
tmp = (tmp << 1) + (WaitToOutput.front() == true ? 1 : 0); | |
WaitToOutput.pop(); | |
} | |
fout << (char)tmp; | |
} | |
} | |
} | |
unsigned char LEFT = (unsigned char)WaitToOutput.size(); | |
unsigned char tmp = (char)(0); | |
while (!WaitToOutput.empty()) | |
{ | |
tmp = (tmp << 1) + (WaitToOutput.front() == true ? 1 : 0); | |
WaitToOutput.pop(); | |
} | |
if (LEFT != 0) | |
{ | |
tmp <<= (8 - LEFT); | |
fout << (char)tmp; | |
} | |
fout << (char)LEFT; | |
fin.close(); | |
fout.close(); | |
delete Tree; | |
cout << "Package Created " << outputFilename << endl; | |
return; | |
} | |
void UnZip(string filename, string outputFile) | |
{ | |
ifstream fin; | |
ofstream fout; | |
HuffmanTreeData *Data = new HuffmanTreeData(); | |
cout << "Opening File " << filename << endl; | |
fin.open(filename, ios::in | ios::binary); | |
cout << "Reading..." << endl; | |
cout << "Reading Key Map..." << endl; | |
map<string, char> KeyTable; | |
unsigned int tmpN; | |
unsigned int bytecount=0; | |
unsigned int byteTotal = 0; | |
fin.seekg(0, ios::end); | |
byteTotal = fin.tellg(); | |
fin.seekg(0, ios::beg); | |
fin.read((char*)&tmpN, sizeof(tmpN)); | |
bytecount += fin.gcount(); | |
KeyTable.clear(); | |
for (int i = 0; i < tmpN;i++) | |
{ | |
KeyStore tmp; | |
fin.read((char*)&tmp, sizeof(KeyStore)); | |
bytecount += fin.gcount(); | |
KINT key = tmp.key; | |
string str = bitset<32>(key).to_string(); | |
str = str.substr(str.size() - tmp.GetKeyLength(), tmp.GetKeyLength()); | |
KeyTable[str] = tmp.ch; | |
//cout <<tmp.ch<<" "<< str << endl; | |
} | |
string processKey = ""; | |
cout << "Writing File..." << endl; | |
fout.open(outputFile, ios::out | ios::binary); | |
char ch; | |
long ClockRecord = clock(), lstClockRecord = clock(); | |
while (fin.good()) | |
{ | |
ch = fin.get(); | |
bytecount += fin.gcount(); | |
ClockRecord = clock(); | |
if (ClockRecord - lstClockRecord >= CLOCKS_PER_SEC) | |
{ | |
lstClockRecord = ClockRecord; | |
cout << "> Processing: " << ((double)bytecount / byteTotal * 100) << " (" << bytecount << "/" << byteTotal << " Bytes)" << endl; | |
} | |
if (bytecount >= byteTotal - 1)break; | |
bitset<8>tmp(ch); | |
for (int i = tmp.size() - 1; i >= 0; i--) | |
{ | |
processKey += (tmp[i] == 0 ? "0" : "1"); | |
//cout << processKey << endl; | |
if (KeyTable.find(processKey) != KeyTable.end()) | |
{ | |
fout << KeyTable[processKey]; | |
processKey = ""; | |
} | |
} | |
//cout << tmp; | |
} | |
char chtmp = fin.get(); | |
int needRead = chtmp; | |
bitset<8> tmpBit(ch); | |
for (int i = tmpBit.size() - 1; i >= 8-needRead; i--) | |
{ | |
processKey += (tmpBit[i] == 0 ? "0" : "1"); | |
//cout << processKey << endl; | |
if (KeyTable.find(processKey) != KeyTable.end()) | |
{ | |
fout << KeyTable[processKey]; | |
processKey = ""; | |
} | |
} | |
fin.close(); | |
fout.close(); | |
cout << "Package Unzipped " << outputFile << endl; | |
return; | |
} | |
int main(int argc, char* argv[]) | |
{ | |
//Change Title to 'Compressor' | |
system("title Compressor"); | |
string filename; | |
string outputFilename; | |
if (argc <= 1) | |
{ | |
cout << "Drag the file you want to Package or Unpackage to here:" << endl; | |
cin >> filename; | |
} | |
else | |
{ | |
filename = argv[1]; | |
} | |
//Check if File Type is .LZW | |
bool unzip = false; | |
if (filename.size() >= 4) | |
{ | |
if (filename.substr(filename.size() - 4, 4) == ".LZW") | |
{ | |
unzip = true; | |
} | |
} | |
if (unzip) | |
{ | |
system("title Unpackaging"); | |
outputFilename = filename.substr(0, filename.size() - 4); | |
UnZip(filename, outputFilename); | |
} | |
else | |
{ | |
system("title Packaging"); | |
outputFilename = filename + ".LZW"; | |
Zip(filename, outputFilename); | |
} | |
system("PAUSE"); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment