Created
March 19, 2018 17:31
-
-
Save christofur/fb80a4f50917512ab1d7944b7be6fb6e to your computer and use it in GitHub Desktop.
Simple Lucene.Net analyzer, demonstrating tokenizer and filter usage
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.IO; | |
using Lucene.Net.Analysis; | |
using Lucene.Net.Analysis.Standard; | |
using Version = Lucene.Net.Util.Version; | |
public class ChainedFiltersAnalyzer : Analyzer | |
{ | |
private const Version _version = Version.LUCENE_30; | |
public override TokenStream TokenStream(string fieldName, TextReader reader) | |
{ | |
//1: Tokenize | |
var tokenStream = ((TokenStream) new StandardTokenizer(_version, reader)); | |
//2: Filter | |
var standardFilteredStream = (TokenStream) new StandardFilter(tokenStream); | |
var stopFilteredStream = new StopFilter(true, standardFilteredStream, StopAnalyzer.ENGLISH_STOP_WORDS_SET, true); | |
var lowercaseFilteredStream = new LowerCaseFilter(stopFilteredStream); | |
var porterStemFilteredStream = new PorterStemFilter(lowercaseFilteredStream); | |
return porterStemFilteredStream; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment