Skip to content

Instantly share code, notes, and snippets.

@PhDP
Last active December 20, 2015 07:29
Show Gist options
  • Save PhDP/6093357 to your computer and use it in GitHub Desktop.
Save PhDP/6093357 to your computer and use it in GitHub Desktop.
The measure as defined in the Porter algorithm.
/**
* Computes the measure of an English word as defined for the Porter algorithm.
* The definition of the measure can be found here:
* http://snowball.tartarus.org/algorithms/porter/stemmer.html
*
* ...but it's overtly complicated. Here's my definition:
*
* The *measure* of a word is the number of vowels followed by a consonant.
*
* Examples:
* Tree = 0 Orc = 1 Obama = 2 Treason = 2
* CCVV VCC VCVCV CCVVCVC
*
* Usage
* -----
* Supply a single argument (an English word) to get its measure. Run with
* no arguments for the tests.
*
* Compilation
* -----------
* clang++ -O3 -std=c++11 measure.cc -o measure
*/
#include <iostream>
#include <string>
#include <map>
#include <boost/algorithm/string.hpp>
using namespace std;
bool main_vowel(char c) {
return c == 'a' || c == 'e' || c == 'i' || c == 'o' || c == 'u';
}
bool vowel(const string &s, int idx) {
return main_vowel(s[idx]) || (idx > 0 && s[idx] == 'y' && !vowel(s, idx-1));
}
bool consonant(const string &s, int idx) {
return !vowel(s, idx);
}
unsigned int measure(string s) {
boost::algorithm::to_lower(s);
unsigned int m = 0;
bool prev = vowel(s, 0);
for (int i = 1; i < s.length(); ++i) {
const bool curr = vowel(s, i);
if (prev && !curr) {
++m;
}
prev = curr;
}
return m;
}
int main(int argc, char **argv) {
if (argc == 1) {
cout << "Running tests...\n";
map<string, unsigned int> words
{{"Tr", 0}, {"ee", 0}, {"TREE", 0}, {"y", 0}, {"BY", 0},
{"trouble", 1}, {"oats", 1}, {"trees", 1}, {"ivy", 1},
{"Troubles", 2}, {"private", 2}, {"OATEN", 2}, {"orrery", 2}};
for (auto &x : words) {
const unsigned int m = measure(x.first);
cout << x.first << ": " << m;
if (m != x.second) {
cout << " -> WRONG!! Correct answer: " << x.second << endl;
return 42;
}
cout << '\n';
}
cout << "All good!" << endl;
} else {
string word{argv[1]};
cout << measure(word) << endl;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment