Skip to content

Instantly share code, notes, and snippets.

@ismailmayat
Created November 4, 2019 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ismailmayat/1b42271b883e31962d72091d17f0bae8 to your computer and use it in GitHub Desktop.
Save ismailmayat/1b42271b883e31962d72091d17f0bae8 to your computer and use it in GitHub Desktop.
public class AsciiFoldingFilter
{
private readonly Analyzer _analyzer;
// We are analyzing the query before adding the wildcards
// This way the words containg diactrics (characters specific to a language)
// will be folded to ASCII character set.
// e.g. word "weiß Glückwunsch" will be flattened into "weiss gluckwunsch"
//
// When the wildcards are added before analyzing, then the text will not be analyzed
// https://issues.apache.org/jira/browse/LUCENENET-486
// http://wiki.apache.org/lucene-java/LuceneFAQ#Are_Wildcard.2C_Prefix.2C_and_Fuzzy_queries_case_sensitive.3F
public AsciiFoldingFilter(BaseSearchProvider baseSearchProvider)
{
var luceneSearch = (BaseLuceneSearcher)baseSearchProvider;
_analyzer = luceneSearch.IndexingAnalyzer;
}
public AsciiFoldingFilter(Analyzer analyzer)
{
_analyzer = analyzer;
}
public string FlattenToAscii(string stringToFold)
{
var parser = new QueryParser(
Lucene.Net.Util.Version.LUCENE_29,
string.Empty,
_analyzer);
var query = parser.Parse(stringToFold.Trim());
return query.ToString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment