Skip to content

Instantly share code, notes, and snippets.

@backsy
Created April 16, 2019 16:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save backsy/a08bf9dcb07c7874ae5839f16d781fb0 to your computer and use it in GitHub Desktop.
Save backsy/a08bf9dcb07c7874ae5839f16d781fb0 to your computer and use it in GitHub Desktop.
#include <iostream>
#include <fstream>
#include <iomanip>
#include <vector>
#include <set>
#include <algorithm>
#include <errno.h>
const unsigned int DISPLAY_N_RESULTS = 10;
const char* FILENAME = "test_text.txt";
struct comp
{
template<typename T>
bool operator()(const T& l, const T& r) const
{
if (l.second == r.second)
return l.first < r.first;
return l.second < r.second;
}
};
void check_file_opened(std::ifstream &stream);
void increment_counter(std::vector<std::pair<std::string, int> > &vector, std::string &word);
std::set<std::pair<std::string, int>, comp> most_common(
std::vector<std::pair<std::string, int> > &vector,
unsigned int n);
int main () {
std::ifstream filestream(FILENAME);
check_file_opened(filestream);
char ch;
int total_combinations = 0;
std::string word_buffer;
std::string temporary_subword;
std::vector<std::pair<std::string, int> > counter;
// loop getting single characters
while (filestream.get(ch))
{
if (isalpha(ch)){
if (isupper(ch)) ch = tolower(ch);
// add valid chars to word_buffer
word_buffer.push_back(ch);
// if more than 3 chars in word_buffer add the combinations to counter
if (word_buffer.length() > 3) {
temporary_subword = word_buffer;
while (temporary_subword.length() > 3)
{
increment_counter(counter, temporary_subword);
total_combinations += 1;
// remove first character from word and try adding that combination too
temporary_subword.erase(temporary_subword.begin());
}
// empty the string for next combination
temporary_subword.clear();
}
// only invalid characters reset the word_buffer
} else {
word_buffer.clear();
}
}
if (filestream.bad())
{
perror("error while reading file");
}
filestream.close();
// print the results
std::set<std::pair<std::string, int>, comp> top = most_common(counter, DISPLAY_N_RESULTS);
std::set<std::pair<std::string, int> >::reverse_iterator it2;
double percent = 0;
std::cout << "Frequency:" << std::endl;
for(it2 = top.rbegin(); it2 != top.rend(); it2++)
{
percent = (it2->second/(double) total_combinations) * 100;
std::cout
<< std::left
<< std::setw(10)
<< it2->first
<< ": "
<< std::right
<< std::fixed
<< std::setprecision(2)
<< std::setw(6)
<< percent << "% "
<< std::left
<< std::setw(50)
<< std::string(percent, '=')
<< std::endl;
}
return 0;
}
void check_file_opened(std::ifstream &stream)
{
if (stream.fail())
{
switch (errno)
{
case EACCES:
// this is set if the drive is not ready in DOS
std::cout << "Drive not ready or permission denied" << std::endl;
break;
case ENOENT:
std::cout << "Could not find this file" << std::endl;
break;
default:
perror("opening data file");
}
}
}
void increment_counter(
std::vector<std::pair<std::string, int> > &vector,
std::string &word)
{
// if the word is in the vector add 1 to its counter
std::vector<std::pair<std::string, int> >::iterator it;
for(it = vector.begin(); it != vector.end(); it++)
{
if(it->first == word)
{
it->second += 1;
return;
}
}
// if the word is not in the vector add it to it
vector.push_back(std::pair<std::string, int>(word,1));
return;
}
std::set<std::pair<std::string, int>, comp> most_common(
std::vector<std::pair<std::string, int> > &vector,
unsigned int n)
{
std::set<std::pair<std::string, int>, comp> top_n;
std::vector<std::pair<std::string,int> >::iterator it;
// make sure you dont try to access values outside the vector
if(n > vector.size())
{
n = vector.size();
}
// limiting the scope of i
{
// add first n items to set
unsigned int i = 0;
it = vector.begin();
while(i < n)
{
top_n.insert(std::pair<std::string, int>(it->first, it->second));
i++;
it++;
}
}
for(it = vector.begin() + n; it != vector.end(); it++)
{
// compare with top most value, if greater then add it to the set
// remove the top most value which is the smallest
// this uses reverse order than desired because
// erasing and comparing at the bottom runs into random segfaults
if(it->second > top_n.begin()->second)
{
top_n.insert(std::pair<std::string, int>(it->first, it->second));
top_n.erase(top_n.begin());
}
}
return top_n;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment