Skip to content

Instantly share code, notes, and snippets.

@karussell
Created January 21, 2011 00:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save karussell/788990 to your computer and use it in GitHub Desktop.
Save karussell/788990 to your computer and use it in GitHub Desktop.
{
"index" : {
"refresh_interval" : "2s",
"analysis" : {
"analyzer" : {
"myanalyzer" : {
"tokenizer" : "standard",
"filter" : ["standard", "lowercase", "stop"]
}
},
"filter" : {
"jetwickfilter": {
"type" : "de.jetwick.es.JetwickFilterFactory"
}
}
}
}
}
package de.jetwick.es;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import de.jetwick.org.apache.solr.analysis.WordDelimiterFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
public class JetwickFilterFactory extends AbstractTokenFilterFactory {
public JetwickFilterFactory(Index index, Settings indexSettings, String name, Settings settings) {
super(index, indexSettings, name);
}
private CharArraySet protectedWords = null;
int generateWordParts = 0;
int generateNumberParts = 0;
int catenateWords = 0;
int catenateNumbers = 0;
int catenateAll = 0;
int splitOnCaseChange = 0;
int splitOnNumerics = 0;
int preserveOriginal = 0;
int stemEnglishPossessive = 0;
String handleAsChar = "";
String handleAsDigit = "@#";
@Override
public TokenStream create(TokenStream tokenStream) {
byte[] tab = new byte[256];
for (int i = 0; i < 256; i++) {
byte code = 0;
if (Character.isLowerCase(i) || handleAsChar.contains(String.valueOf((char) i))) {
code |= WordDelimiterFilter.LOWER;
} else if (Character.isUpperCase(i)) {
code |= WordDelimiterFilter.UPPER;
} else if (Character.isDigit(i) || handleAsDigit.contains(String.valueOf((char) i))) {
code |= WordDelimiterFilter.DIGIT;
}
if (code == 0) {
code = WordDelimiterFilter.SUBWORD_DELIM;
}
tab[i] = code;
}
return new WordDelimiterFilter(tokenStream, tab,
generateWordParts, generateNumberParts,
catenateWords, catenateNumbers, catenateAll,
splitOnCaseChange, preserveOriginal,
splitOnNumerics, stemEnglishPossessive, protectedWords);
}
@Override
public String name() {
return "jetwickfilter";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment