Skip to content

Instantly share code, notes, and snippets.

@christofur
Created March 19, 2018 17:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save christofur/fb80a4f50917512ab1d7944b7be6fb6e to your computer and use it in GitHub Desktop.
Save christofur/fb80a4f50917512ab1d7944b7be6fb6e to your computer and use it in GitHub Desktop.
Simple Lucene.Net analyzer, demonstrating tokenizer and filter usage
using System.IO;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Version = Lucene.Net.Util.Version;
public class ChainedFiltersAnalyzer : Analyzer
{
private const Version _version = Version.LUCENE_30;
public override TokenStream TokenStream(string fieldName, TextReader reader)
{
//1: Tokenize
var tokenStream = ((TokenStream) new StandardTokenizer(_version, reader));
//2: Filter
var standardFilteredStream = (TokenStream) new StandardFilter(tokenStream);
var stopFilteredStream = new StopFilter(true, standardFilteredStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET, true);
var lowercaseFilteredStream = new LowerCaseFilter(stopFilteredStream);
var porterStemFilteredStream = new PorterStemFilter(lowercaseFilteredStream);
return porterStemFilteredStream;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment