Skip to content

Instantly share code, notes, and snippets.

@jafavaro
Last active April 11, 2023 20:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jafavaro/2b80decbe551a71e6977a147df79f0c1 to your computer and use it in GitHub Desktop.
Save jafavaro/2b80decbe551a71e6977a147df79f0c1 to your computer and use it in GitHub Desktop.
Elastic Analyzers with NGram Tokenizer
private static AnalysisDescriptor Analysis(AnalysisDescriptor analysis) => analysis
.Tokenizers(t => t.NGram("ngram_tokenizer", p => p.MinGram(2).MaxGram(5)
.TokenChars(new List<TokenChar>{Nest.TokenChar.Letter,Nest.TokenChar.Digit}))
.Standard("standard")
)
.TokenFilters(t => t.EdgeNGram("autocomplete_filter", p => p.MinGram(1).MaxGram(40)))
.CharFilters(c => c.HtmlStrip("html_strip")
.PatternReplace("pattern_replace", r => r.Pattern(@"([\%\/\\\&\?\,\'\;\:\!\-\_\(\)\.]+)").Replacement(""))
)
.Analyzers(
analyzers => analyzers
.Custom("autocomplete",
c => c.Tokenizer("whitespace")
.Filters("lowercase", "autocomplete_filter")
.CharFilters("html_strip", "pattern_replace")
)
.Custom("html_remove", h => h
.CharFilters("html_strip").Tokenizer("standard")
)
.Custom("full_autocomplete", h => h
.CharFilters("html_strip", "pattern_replace")
.Filters("lowercase")
.Tokenizer("standard")
)
.Custom("ngram_analyzer", h => h.Tokenizer("ngram_tokenizer").Filters("lowercase"))
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment