Skip to content

Instantly share code, notes, and snippets.

@ahmetb
Created December 26, 2011 00:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ahmetb/1520093 to your computer and use it in GitHub Desktop.
Save ahmetb/1520093 to your computer and use it in GitHub Desktop.
simple edge n-gram prefix filter
package service.search;
import java.io.Reader;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter.Side;
import org.apache.lucene.util.Version;
public final class LowerCaseNGramPrefixAnalyzer extends ReusableAnalyzerBase {
private final Version matchVersion;
public LowerCaseNGramPrefixAnalyzer(Version matchVersion) {
this.matchVersion = matchVersion;
}
@Override
protected TokenStreamComponents createComponents(String fieldName,
Reader reader) {
Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
TokenStream sink = new LowerCaseFilter(Version.LUCENE_35, source);
return new TokenStreamComponents(source, new EdgeNGramTokenFilter(
sink, Side.FRONT, 3, 15));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment