Skip to content

Instantly share code, notes, and snippets.

@JustSlavic
Created December 19, 2019 09:09
Show Gist options
  • Save JustSlavic/283030aac2a04184e07f028fc9cb26eb to your computer and use it in GitHub Desktop.
Save JustSlavic/283030aac2a04184e07f028fc9cb26eb to your computer and use it in GitHub Desktop.
Getting frequencies of words (ASCII)
#include <iostream>
#include <fstream>
#include <string>
#include <functional>
#include <algorithm>
#include <vector>
class dictionary {
public:
using value_t = std::pair<std::string, size_t>;
std::function<bool(const value_t&, const value_t&)> cmp = [](const value_t& lhs, const value_t& rhs) -> bool {
if (lhs.second == rhs.second) {
return lhs.first < rhs.first;
}
return lhs.second > rhs.second;
};
void push(const std::string& value) {
auto found = std::find_if(
container.begin(),
container.end(),
[&value](const value_t& v) {
return v.first == value;
});
if (found == std::end(container)) {
container.emplace_back(value, 1);
} else {
found->second++;
}
}
void sort() {
std::sort(container.begin(), container.end(), cmp);
}
std::vector<value_t> to_vector() const {
return container;
}
private:
std::vector<value_t> container;
};
void apply_to_words(std::istream& input, const std::function<void(std::string&)>& callback) {
std::string buffer;
while (!input.eof()) {
char c = input.get();
if (std::isalpha(c)) {
buffer.push_back(std::tolower(c));
} else if (!buffer.empty()) {
callback(buffer);
buffer.clear();
}
}
if (!buffer.empty()) {
callback(buffer);
}
}
std::vector<std::pair<std::string, size_t>> count_words(std::istream& input) {
dictionary dict;
apply_to_words(input, [&dict](std::string& word) { dict.push(word); });
dict.sort();
return dict.to_vector();
}
int main(int argc, char** argv) {
if (argc < 3) {
std::cout << "usage: freq <input file> <output file>" << std::endl;
return EXIT_FAILURE;
}
std::ifstream input(argv[1]);
if (!input.is_open()) {
std::cerr << "Cannot open the input file";
return 1;
}
std::ofstream output(argv[2]);
if (!output.is_open()) {
std::cerr << "Cannot open the output file";
return 1;
}
auto result = count_words(input);
return EXIT_SUCCESS;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment