Skip to content

Instantly share code, notes, and snippets.

@ahmetb ahmetb/gist:1520093
Created Dec 26, 2011

Embed
What would you like to do?
simple edge n-gram prefix filter
package service.search;
import java.io.Reader;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
import org.apache.lucene.util.Version;
public final class LowerCaseNGramPrefixAnalyzer extends ReusableAnalyzerBase {
private final Version matchVersion;
public LowerCaseNGramPrefixAnalyzer(Version matchVersion) {
this.matchVersion = matchVersion;
}
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
TokenStream sink = new LowerCaseFilter(Version.LUCENE_35, source);
return new TokenStreamComponents(source, new EdgeNGramTokenFilter(
sink, Side.FRONT, 3, 15));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.