Skip to content

Instantly share code, notes, and snippets.

@feupeu
Created September 28, 2015 10:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save feupeu/dfe78d8c18bf8eab4663 to your computer and use it in GitHub Desktop.
Save feupeu/dfe78d8c18bf8eab4663 to your computer and use it in GitHub Desktop.
package dk.cs.aau.sw705e15.spoder.external;
/*
author: Fotis Lazarinis (actually I translated from C to Java)
date: June 1997
address: Psilovraxou 12, Agrinio, 30100
comments: Compile it, import the Porter class into you program and create an instance.
Then use the stripAffixes method of this method which takes a String as
input and returns the stem of this String again as a String.
*/
class NewString {
public String str;
NewString() {
str = "";
}
}
public class Porter {
public static String stem(final String input) {
Porter p = new Porter();
return p.stripAffixes(input);
}
@SuppressWarnings("static-access")
private String Clean(String str) {
int last = str.length();
Character ch = new Character(str.charAt(0));
String temp = "";
for (int i = 0; i < last; i++) {
if (ch.isLetterOrDigit(str.charAt(i)))
temp += str.charAt(i);
}
return temp;
} // clean
private boolean hasSuffix(String word, String suffix, NewString stem) {
String tmp = "";
if (word.length() <= suffix.length())
return false;
if (suffix.length() > 1)
if (word.charAt(word.length() - 2) != suffix.charAt(suffix.length() - 2))
return false;
stem.str = "";
for (int i = 0; i < word.length() - suffix.length(); i++)
stem.str += word.charAt(i);
tmp = stem.str;
for (int i = 0; i < suffix.length(); i++)
tmp += suffix.charAt(i);
if (tmp.compareTo(word) == 0)
return true;
else
return false;
}
private boolean vowel(char ch, char prev) {
switch (ch) {
case 'a':
case 'e':
case 'i':
case 'o':
case 'u':
return true;
case 'y': {
switch (prev) {
case 'a':
case 'e':
case 'i':
case 'o':
case 'u':
return false;
default:
return true;
}
}
default:
return false;
}
}
private int measure(String stem) {
int i = 0, count = 0;
int length = stem.length();
while (i < length) {
for (; i < length; i++) {
if (i > 0) {
if (vowel(stem.charAt(i), stem.charAt(i - 1)))
break;
} else {
if (vowel(stem.charAt(i), 'a'))
break;
}
}
for (i++; i < length; i++) {
if (i > 0) {
if (!vowel(stem.charAt(i), stem.charAt(i - 1)))
break;
} else {
if (!vowel(stem.charAt(i), '?'))
break;
}
}
if (i < length) {
count++;
i++;
}
} // while
return (count);
}
private boolean containsVowel(String word) {
for (int i = 0; i < word.length(); i++)
if (i > 0) {
if (vowel(word.charAt(i), word.charAt(i - 1)))
return true;
} else {
if (vowel(word.charAt(0), 'a'))
return true;
}
return false;
}
private boolean cvc(String str) {
int length = str.length();
if (length < 3)
return false;
if ((!vowel(str.charAt(length - 1), str.charAt(length - 2))) && (str.charAt(length - 1) != 'w')
&& (str.charAt(length - 1) != 'x') && (str.charAt(length - 1) != 'y')
&& (vowel(str.charAt(length - 2), str.charAt(length - 3)))) {
if (length == 3) {
if (!vowel(str.charAt(0), '?'))
return true;
else
return false;
} else {
if (!vowel(str.charAt(length - 3), str.charAt(length - 4)))
return true;
else
return false;
}
}
return false;
}
private String step1(String str) {
NewString stem = new NewString();
if (str.charAt(str.length() - 1) == 's') {
if ((hasSuffix(str, "sses", stem)) || (hasSuffix(str, "ies", stem))) {
String tmp = "";
for (int i = 0; i < str.length() - 2; i++)
tmp += str.charAt(i);
str = tmp;
} else {
if ((str.length() == 1) && (str.charAt(str.length() - 1) == 's')) {
str = "";
return str;
}
if (str.charAt(str.length() - 2) != 's') {
String tmp = "";
for (int i = 0; i < str.length() - 1; i++)
tmp += str.charAt(i);
str = tmp;
}
}
}
if (hasSuffix(str, "eed", stem)) {
if (measure(stem.str) > 0) {
String tmp = "";
for (int i = 0; i < str.length() - 1; i++)
tmp += str.charAt(i);
str = tmp;
}
} else {
if ((hasSuffix(str, "ed", stem)) || (hasSuffix(str, "ing", stem))) {
if (containsVowel(stem.str)) {
String tmp = "";
for (int i = 0; i < stem.str.length(); i++)
tmp += str.charAt(i);
str = tmp;
if (str.length() == 1)
return str;
if ((hasSuffix(str, "at", stem)) || (hasSuffix(str, "bl", stem)) || (hasSuffix(str, "iz", stem))) {
str += "e";
} else {
int length = str.length();
if ((str.charAt(length - 1) == str.charAt(length - 2)) && (str.charAt(length - 1) != 'l')
&& (str.charAt(length - 1) != 's') && (str.charAt(length - 1) != 'z')) {
tmp = "";
for (int i = 0; i < str.length() - 1; i++)
tmp += str.charAt(i);
str = tmp;
} else if (measure(str) == 1) {
if (cvc(str))
str += "e";
}
}
}
}
}
if (hasSuffix(str, "y", stem))
if (containsVowel(stem.str)) {
String tmp = "";
for (int i = 0; i < str.length() - 1; i++)
tmp += str.charAt(i);
str = tmp + "i";
}
return str;
}
private String step2(String str) {
String[][] suffixes = { { "ational", "ate" }, { "tional", "tion" }, { "enci", "ence" }, { "anci", "ance" },
{ "izer", "ize" }, { "iser", "ize" }, { "abli", "able" }, { "alli", "al" }, { "entli", "ent" },
{ "eli", "e" }, { "ousli", "ous" }, { "ization", "ize" }, { "isation", "ize" }, { "ation", "ate" },
{ "ator", "ate" }, { "alism", "al" }, { "iveness", "ive" }, { "fulness", "ful" }, { "ousness", "ous" },
{ "aliti", "al" }, { "iviti", "ive" }, { "biliti", "ble" } };
NewString stem = new NewString();
for (int index = 0; index < suffixes.length; index++) {
if (hasSuffix(str, suffixes[index][0], stem)) {
if (measure(stem.str) > 0) {
str = stem.str + suffixes[index][1];
return str;
}
}
}
return str;
}
private String step3(String str) {
String[][] suffixes = { { "icate", "ic" }, { "ative", "" }, { "alize", "al" }, { "alise", "al" },
{ "iciti", "ic" }, { "ical", "ic" }, { "ful", "" }, { "ness", "" } };
NewString stem = new NewString();
for (int index = 0; index < suffixes.length; index++) {
if (hasSuffix(str, suffixes[index][0], stem))
if (measure(stem.str) > 0) {
str = stem.str + suffixes[index][1];
return str;
}
}
return str;
}
private String step4(String str) {
String[] suffixes = { "al", "ance", "ence", "er", "ic", "able", "ible", "ant", "ement", "ment", "ent", "sion",
"tion", "ou", "ism", "ate", "iti", "ous", "ive", "ize", "ise" };
NewString stem = new NewString();
for (int index = 0; index < suffixes.length; index++) {
if (hasSuffix(str, suffixes[index], stem)) {
if (measure(stem.str) > 1) {
str = stem.str;
return str;
}
}
}
return str;
}
private String step5(String str) {
if (str.charAt(str.length() - 1) == 'e') {
if (measure(
str) > 1) {/*
* measure(str)==measure(stem) if ends in vowel
*/
String tmp = "";
for (int i = 0; i < str.length() - 1; i++)
tmp += str.charAt(i);
str = tmp;
} else if (measure(str) == 1) {
String stem = "";
for (int i = 0; i < str.length() - 1; i++)
stem += str.charAt(i);
if (!cvc(stem))
str = stem;
}
}
if (str.length() == 1)
return str;
if ((str.charAt(str.length() - 1) == 'l') && (str.charAt(str.length() - 2) == 'l') && (measure(str) > 1))
if (measure(
str) > 1) {/*
* measure(str)==measure(stem) if ends in vowel
*/
String tmp = "";
for (int i = 0; i < str.length() - 1; i++)
tmp += str.charAt(i);
str = tmp;
}
return str;
}
private String stripPrefixes(String str) {
String[] prefixes = { "kilo", "micro", "milli", "intra", "ultra", "mega", "nano", "pico", "pseudo" };
int last = prefixes.length;
for (int i = 0; i < last; i++) {
if (str.startsWith(prefixes[i])) {
String temp = "";
for (int j = 0; j < str.length() - prefixes[i].length(); j++)
temp += str.charAt(j + prefixes[i].length());
return temp;
}
}
return str;
}
private String stripSuffixes(String str) {
str = step1(str);
if (str.length() >= 1)
str = step2(str);
if (str.length() >= 1)
str = step3(str);
if (str.length() >= 1)
str = step4(str);
if (str.length() >= 1)
str = step5(str);
return str;
}
public String stripAffixes(String str) {
str = str.toLowerCase();
str = Clean(str);
if ((str != "") && (str.length() > 2)) {
str = stripPrefixes(str);
if (str != "")
str = stripSuffixes(str);
}
return str;
} // stripAffixes
} // class
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment