Skip to content

Instantly share code, notes, and snippets.

@maixuanhan
Created April 5, 2018 03:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maixuanhan/43948380a8a99503f6b7285770929c56 to your computer and use it in GitHub Desktop.
Save maixuanhan/43948380a8a99503f6b7285770929c56 to your computer and use it in GitHub Desktop.
Word count problem
// mxhan
// Problem 1: word counting
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <iostream>
#include <string>
using namespace std;
inline int hashChar(char c)
{
return c - 'A' - (c > 'Z' ? 32 : 0);
}
inline char toChar(int i)
{
return 'a' + i;
}
inline bool isChar(char c)
{
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z';
}
class TreeNode
{
public:
TreeNode()
{
this->counter = 0;
memset(this->dict, 0x00, sizeof(this->dict));
}
~TreeNode()
{
for (int i = 0; i < 26; ++i)
{
if (this->dict[i] != NULL) delete this->dict[i];
}
}
int counter;
TreeNode* dict[26];
};
class Word
{
public:
Word()
{
// this->word = "";
this->counter = 0;
}
string word;
int counter;
};
class Fsm
{
public:
Fsm()
{
this->head = new TreeNode;
this->current = this->head;
this->wordCount = 0;
this->currentWordIdx = 0;
this->words = NULL;
}
~Fsm()
{
delete this->head;
if (this->words != NULL) delete[] this->words;
}
void Read(char c)
{
if (isChar(c))
{
int idx = hashChar(c);
if (this->current->dict[idx] == NULL)
{
this->current->dict[idx] = new TreeNode;
}
this->current = this->current->dict[idx];
}
else
{
EndRead();
}
}
void EndRead()
{
if (this->current != this->head)
{
this->current->counter++;
if (this->current->counter == 1) // new word
{
this->wordCount++;
}
}
this->current = this->head;
}
void ExtractData()
{
if (this->wordCount <= 0) return;
// Init result list
if (this->words != NULL) delete[] this->words;
this->words = new Word[this->wordCount];
this->currentWordIdx = 0;
this->ExtractNode(this->head, string());
}
void SortData()
{
// TODO: qsort algorithm if the wordCount is big
for (int i = 0; i < this->wordCount - 1; ++i)
{
for (int j = i + 1; j < this->wordCount; ++j)
{
if (this->words[i].counter > this->words[j].counter)
{
// swap 2 words
Word temp = this->words[i];
this->words[i] = this->words[j];
this->words[j] = temp;
}
}
}
}
void PrintData(int n, bool isDescendingMode)
{
if (isDescendingMode)
{
for (int i = 0; i < n && i < this->wordCount; ++i)
{
cout << (i + 1) << ".\t" << this->words[this->wordCount - 1 - i].word << " - "
<< this->words[this->wordCount - 1 - i].counter << endl;
}
}
else
{
for (int i = 0; i < n && i < this->wordCount; ++i)
{
cout << (i + 1) << ".\t" << this->words[i].word << " - " << this->words[i].counter << endl;
}
}
}
private:
void ExtractNode(TreeNode* node, string baseStr)
{
if (node == NULL) return;
if (node->counter > 0)
{
this->words[this->currentWordIdx].word = baseStr;
this->words[this->currentWordIdx].counter = node->counter;
this->currentWordIdx++;
}
for (int i = 0; i < 26; ++i)
{
if (node->dict[i] != NULL)
{
ExtractNode(node->dict[i], baseStr + toChar(i));
}
}
}
private:
TreeNode* head; // head TreeNode's counter is alway zero
TreeNode* current;
Word* words;
int wordCount;
int currentWordIdx;
};
int main(int argc, char* argv[])
{
if (argc != 4)
{
cerr << "Wrong command usage.\n" << endl;
return 1;
}
char* fileName = argv[1];
char* command = argv[2];
int count = atoi(argv[3]);
if (count <= 0)
{
// fair enough, no entry found
return 0;
}
Fsm f;
string line;
ifstream input(fileName);
if (input.is_open())
{
char c = input.get();
while (input.good())
{
f.Read(c);
c = input.get();
}
f.EndRead();
input.close();
}
else
{
cerr << "File not found.\n" << endl;
return 1;
}
f.ExtractData();
f.SortData();
if (hashChar(command[0]) == hashChar('A'))
{
// Ascending mode
f.PrintData(count, false);
}
else if (hashChar(command[0]) == hashChar('D'))
{
// Descending mode
f.PrintData(count, true);
}
else
{
cerr << "Unrecognized query mode.\n" << endl;
return 1;
}
return 0;
}
@maixuanhan
Copy link
Author

maixuanhan commented Apr 5, 2018

Compile and test

g++ problem1.cpp 
./a.out  "novel.txt" "D" 5
./a.out  "novel.txt" "A" 20

novel.txt

India is my Country. All Indians are my brothers and sisters. I love my country. India is part of Asia. My country my land.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment