Skip to content

Instantly share code, notes, and snippets.

@taras-sereda
Created February 27, 2017 06:23
Show Gist options
  • Save taras-sereda/d11078a555c93c048b92e6c93d86815a to your computer and use it in GitHub Desktop.
Save taras-sereda/d11078a555c93c048b92e6c93d86815a to your computer and use it in GitHub Desktop.
word frequency tool
// compile with g++ --std=c++11
// g++ --std=c++11 freqs.cpp -o freqs
#include <iostream>
#include <sstream>
#include <fstream>
#include <regex>
#include <algorithm>
#include <map>
#include <vector>
using namespace std;
int main(int argc, char *argv[]) {
locale::global(locale(""));
regex word_pattern("[[:alpha:]]+");
map<string, int> frequency_map;
ifstream input_file (argv[1]);
ofstream out_file (argv[2]);
string line;
if (input_file.is_open())
{
while (getline(input_file, line)) {
//remove punctuation;
line.erase(remove_if(line.begin(), line.end(), ::ispunct), line.end());
istringstream iss(line);
while(iss)
{
string sub_str;
iss >> sub_str;
if (regex_match(sub_str, word_pattern))
{
//convert word to lower case;
transform(sub_str.begin(), sub_str.end(), sub_str.begin(), ::tolower);
map<string, int>::iterator it = frequency_map.find(sub_str);
if (it != frequency_map.end()) {
frequency_map[sub_str] += 1;
}
else {
frequency_map[sub_str] = 1;
}
}
}
}
input_file.close();
}
else {
cout << "Unable to open input file" << endl;
}
map<string, int>::iterator it;
vector<pair<int, string>> frequency_list;
//build list of pairs
for (it = frequency_map.begin(); it != frequency_map.end(); it++) {
frequency_list.push_back(make_pair(-it -> second, it -> first));
}
//sort pairs
sort(frequency_list.begin(), frequency_list.end());
if (out_file.is_open()) {
for (auto &item: frequency_list) {
out_file << abs(item.first) << " " << item.second << endl;
}
} else {
cout << "Unable to open out file" << endl;
}
out_file.close();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment