Skip to content

Instantly share code, notes, and snippets.

@vince-geekcore
Last active August 29, 2015 14:28
Show Gist options
  • Save vince-geekcore/c084fa22aad345f06049 to your computer and use it in GitHub Desktop.
Save vince-geekcore/c084fa22aad345f06049 to your computer and use it in GitHub Desktop.
Lucene.NET HitHighlighter (with Boolean Query) could be used in combination with Sitecore.
/// <summary>
/// Uses Lucene Contrib Highlighter to create search highlight based on an index field/string with all content
/// This code requires Lucene 3.0.3.0 DLL from Nuget and is not supported with the Sitecore7 Lucene DLL out of the box.
/// This is pure Lucene code and does not use any Sitecore namespaces.
/// Adding Try/catch is advised. searchQuery = searchterm/keyword(s)
/// </summary>
private string GenerateHighlightText(string pageContent, string searchQuery)
{
var analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
// Create Wildcard query using the BooleanQuery for multiple words.
var booleanQuery = new BooleanQuery();
var segments = searchQuery.ToLower().Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
foreach (var segment in segments)
{
var wildcardQuery = new WildcardQuery(new Lucene.Net.Index.Term("", segment + "*"));
booleanQuery.Add(new BooleanClause(wildcardQuery, Occur.SHOULD));
}
IFormatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>");
// set max size for every segment
var fragmenter = new SimpleFragmenter(132);
var scorer = new QueryScorer(booleanQuery);
var highlighter = new Highlighter(formatter, scorer) { TextFragmenter = fragmenter };
// remove html tags from content
string rawPageContent = StringUtil.RemoveTags(pageContent);
// get highlighted fragments
TokenStream stream = analyzer.TokenStream("", new StringReader(rawPageContent));
string highlightedFragment = highlighter.GetBestFragments(stream, rawPageContent, 3, "...");
if (!string.IsNullOrWhiteSpace(highlightedFragment))
{
return new HtmlString(highlightedFragment);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment