Skip to content

Instantly share code, notes, and snippets.

@kurojs
Created April 27, 2020 01:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kurojs/ca0f5707505b1ee477a5eed2a969beca to your computer and use it in GitHub Desktop.
Save kurojs/ca0f5707505b1ee477a5eed2a969beca to your computer and use it in GitHub Desktop.
Word Count Engine
#include <iostream>
#include <string>
#include <vector>
#include <unordered_map>
#include <map>
using namespace std;
// Time complexity: O(N) for making occ map, O(logN) for making sorted map, and O(N) for buil the answer. So total is O(2N + logN)
// Space complexity: O(3N) for 2 map and the answer
vector<vector<string>> wordCountEngine( const string& origin_document )
{
// Add trailing space in the end of origin string
string document = origin_document;
document += " ";
// Check if input empty
vector<vector<string>> ans;
int n = document.length();
if (n == 0) return ans;
unordered_map<string, pair<int, int>> occ;
int idx = 0;
// Start from begin of string, if we see a whitespace
// lookback to previous whitespace index
for (int i = 0; i < n; i++) {
if (document[i] == ' ') {
string subStr = "";
// And make a substring from valid character
for (int j = idx; j <= i; j++) {
if (isalpha(document[j])) {
subStr += tolower(document[j]);
}
}
if (subStr.length() == 0) continue;
// Foreach substring, we keep track its first index
// and it occurrences
if (occ.find(subStr) == occ.end()) {
occ[subStr] = {idx, 0};
}
occ[subStr] = {occ[subStr].first, occ[subStr].second + 1};
idx = i + 1;
}
}
// After we have collect all substring data
// we need sorted it in order, so I use a map of map
// with the first key is the substring occurences and the second
// key is the substring first index
map<int, map<int, string>> sorted;
for (auto &it : occ) {
string subStr = it.first;
int _occ = it.second.second;
int _idx = it.second.first;
if (sorted.find(_occ) == sorted.end()) {
sorted[_occ] = map<int, string>();
}
sorted[_occ][_idx] = subStr;
}
// Then i just make answer by travel over this sorted map
for (auto &it : sorted) {
// Note: in this sub map, the index are reversed sorted
for (auto sub_it = it.second.rbegin(); sub_it != it.second.rend(); ++sub_it) {
vector<string> temp;
temp.push_back(sub_it->second);
temp.push_back(to_string(it.first));
ans.insert(ans.begin(), temp);
}
}
return ans;
}
int main() {
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment