Skip to content

Instantly share code, notes, and snippets.

@jcelerier
Created July 25, 2022 17:58
Show Gist options
  • Save jcelerier/74dfd473bccec8f1bd5d78be5aada569 to your computer and use it in GitHub Desktop.
Save jcelerier/74dfd473bccec8f1bd5d78be5aada569 to your computer and use it in GitHub Desktop.
counting words in c++
#include <algorithm>
#include <boost/container/small_vector.hpp>
#include <fmt/format.h>
#include <robin_hood.h>
static constexpr inline char to_lower(char c) noexcept {
return ((c >= 'A' && c <= 'Z') ? c + ('a' - 'A') : c);
}
static constexpr inline bool is_space(char c) noexcept { return c <= ' '; }
namespace robin_hood {
template <typename T, std::size_t N>
struct hash<boost::container::small_vector<T, N>> {
constexpr size_t operator()(const auto &str) const noexcept {
return robin_hood::hash_bytes(str.data(), sizeof(T) * str.size());
}
};
} // namespace robin_hood
int main() {
using string = boost::container::small_vector<char, 32>;
using map = robin_hood::unordered_flat_map<string, int>;
string word;
robin_hood::unordered_flat_map<string, int> counts;
counts.reserve(1e5);
constexpr int bufsize = 1024 * 1024;
alignas(64) char buf[bufsize];
for (ssize_t n = 0; (n = read(STDIN_FILENO, buf, bufsize)) && n >= 0;) {
for (int i = 0; i < n; i++) {
if (is_space(buf[i])) {
if (!word.empty()) {
counts[word]++;
word.clear();
}
} else {
word.push_back(to_lower(buf[i]));
}
}
}
{
using elem_type = map::value_type;
auto ptr = std::make_unique_for_overwrite<elem_type *[]>(counts.size());
auto it = counts.begin();
auto dit = ptr.get();
while (it != counts.end())
*dit++ = &*it++;
std::sort(ptr.get(), dit, [](const auto &lhs, const auto &rhs) noexcept {
return lhs->second > rhs->second;
});
for (auto it = ptr.get(); it != dit; ++it) {
auto &count = **it;
fmt::print(stdout, "{} {}\n",
std::string_view(count.first.data(), count.first.size()),
count.second);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment