Skip to content

Instantly share code, notes, and snippets.

@ivan-ushakov
Created March 2, 2017 19:11
Show Gist options
  • Save ivan-ushakov/d2f36761b523a716afbe2a283ce481b8 to your computer and use it in GitHub Desktop.
Save ivan-ushakov/d2f36761b523a716afbe2a283ce481b8 to your computer and use it in GitHub Desktop.
Calculate frequency of words in text.
// requires C++11, compile with -std=c++11 flag
#include <string>
#include <unordered_map>
#include <vector>
#include <fstream>
#include <iostream>
#include <sstream>
#include <algorithm>
class Counter
{
std::string word_;
int value_;
public:
Counter() {}
Counter(const std::string &word) : word_(word), value_(1) {}
void increase()
{
value_ = value_ + 1;
}
const std::string &word() const
{
return word_;
}
int value() const
{
return value_;
}
};
int main(int argc, char const *argv[])
{
if (argc != 3)
{
std::cout << "usage: freqs in.txt out.txt" << std::endl;
return 0;
}
std::ifstream input(argv[1], std::ios::binary);
if (!input)
{
std::cout << "error: can't read input file" << std::endl;
return -1;
}
std::unordered_map<std::string, Counter> map;
std::stringstream stream;
while (input)
{
char c = input.get();
if (c >= 'a' && c <= 'z')
{
stream << c;
}
else if (c >= 'A' && c <= 'Z')
{
stream << char(c + 32);
}
else
{
std::string word = stream.str();
if (word.size() != 0)
{
auto p = map.find(word);
if (p != map.end())
{
p->second.increase();
}
else
{
map[word] = Counter(word);
}
stream.str(std::string());
}
}
}
std::vector<Counter> v;
for (auto &e : map)
{
v.push_back(e.second);
}
std::sort(v.begin(), v.end(), [](const Counter &a, const Counter &b) {
if (a.value() > b.value()) return true;
if (a.value() < b.value()) return false;
return a.word() < b.word();
});
std::ofstream output(argv[2]);
if (!output)
{
std::cout << "error: can't create output file" << std::endl;
return -1;
}
for (auto &e : v)
{
output << e.value() << " " << e.word() << std::endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment