Skip to content

Instantly share code, notes, and snippets.

@mikaelnet
Last active May 10, 2020 07:54
Show Gist options
  • Save mikaelnet/0413e9c8ca7df15f8073d6ec3ed3191a to your computer and use it in GitHub Desktop.
Save mikaelnet/0413e9c8ca7df15f8073d6ec3ed3191a to your computer and use it in GitHub Desktop.
Sitecore Filtered Item Crawler
using System;
using System.Collections.Generic;
using System.Linq;
using Sitecore;
using Sitecore.Collections;
using Sitecore.ContentSearch;
using Sitecore.Data;
using Sitecore.Data.Items;
using Sitecore.Diagnostics;
namespace My.Namespace
{
/// <summary>
/// Represents a custom item crawler that has the following improvements:
/// * Ability to specify what items to index based on templates (or base templates)
/// * Ability to stop crawling items and its descendants based on template, item id or item name
/// </summary>
public class FilteredSitecoreItemCrawler : SitecoreSupportPatches.Search.SitecoreItemCrawler
{
private readonly ISet<Guid> _excludedTemplates = new HashSet<Guid>();
private readonly ISet<Guid> _stopTemplates = new HashSet<Guid>();
private readonly ISet<Guid> _stopItemIds = new HashSet<Guid>();
private readonly ISet<string> _stopItemNames = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase);
private readonly ISet<Guid> _includedTemplates = new HashSet<Guid>();
/// <summary>
/// Represent the config section &lt;include hint="list:AddIncludedTemplate"&gt;
/// to list templates that should be included in the index
/// </summary>
/// <param name="templateId"></param>
[UsedImplicitly]
public virtual void AddIncludedTemplate(string templateId)
{
Assert.ArgumentNotNull(templateId, nameof(templateId));
Assert.IsTrue(ID.IsID(templateId), $"Configuration: AddIncludedTemplate entry is not a valid GUID. Template ID Value: {templateId}");
_includedTemplates.Add(ID.Parse(templateId).Guid);
}
/// <summary>
/// Represents the config section &lt;stop hint="list:AddStopTemplate"&gt;
/// to list templates from where no descendants should be processed
/// </summary>
/// <param name="templateId"></param>
[UsedImplicitly]
public virtual void AddStopTemplate(string templateId)
{
Assert.ArgumentNotNull(templateId, nameof(templateId));
Assert.IsTrue(ID.IsID(templateId), $"Configuration: AddIncludedTemplate entry is not a valid GUID. Template ID Value: {templateId}");
_stopTemplates.Add(ID.Parse(templateId).Guid);
}
/// <summary>
/// Represents the config section &lt;stop hint="list:AddStopItem"&gt;
/// to list item names or item guids where no descendants should be processed
/// </summary>
/// <param name="item"></param>
[UsedImplicitly]
public virtual void AddStopItem(string item)
{
Assert.ArgumentNotNull(item, nameof(item));
if (ID.IsID(item))
{
_stopItemIds.Add(ID.Parse(item).Guid);
}
else
{
_stopItemNames.Add(item);
}
}
protected override bool IsExcludedFromIndex(SitecoreIndexableItem indexable, bool checkLocation = false)
{
var item = (Item)indexable;
Assert.ArgumentNotNull(item, "item");
if (_excludedTemplates.Contains(item.TemplateID.Guid) || _stopTemplates.Contains(item.TemplateID.Guid))
return true;
foreach (var includedTemplate in _includedTemplates)
{
if (item.DescendsFrom(includedTemplate))
{
return base.IsExcludedFromIndex(indexable, checkLocation);
}
}
// Learn from previous template types to faster ignore items we don't need to index
_excludedTemplates.Add(item.TemplateID.Guid);
return true;
}
/// <summary>
/// This method extends the built in one, by removing child elements that are
/// in the stop list of template types
/// </summary>
/// <param name="parent"></param>
/// <returns></returns>
protected override ChildList GetChildList(SitecoreIndexableItem parent)
{
var unfilteredChildList = base.GetChildList(parent);
var itemList = new ItemList();
foreach (Item item in unfilteredChildList)
{
if (_stopItemIds.Contains(item.ID.Guid))
continue;
if (_stopItemNames.Contains(item.Name))
continue;
if (_stopTemplates.Any(st => item.DescendsFrom(st)))
continue;
itemList.Add(item);
}
return new ChildList(unfilteredChildList.OwnerItem, itemList);
}
}
}
<?xml version="1.0" encoding="utf-8" ?>
<configuration xmlns:patch="http://www.sitecore.net/xmlconfig/" xmlns:set="http://www.sitecore.net/xmlconfig/set/"
xmlns:role="http://www.sitecore.net/xmlconfig/role/" xmlns:env="http://www.sitecore.net/xmlconfig/env/"
xmlns:search="http://www.sitecore.net/xmlconfig/search/">
<sitecore search:require="Solr">
<contentSearch>
<configuration>
<indexes>
<index id="myIndex">
<locations hint="list:AddCrawler">
<crawler type="My.Namespace.FilteredSitecoreItemCrawler, My.Assembly">
<Database>master</Database>
<Root>/sitecore/content</Root>
<include hint="list:AddIncludedTemplate">
<SomeTemplate>{7C4770FB-94E3-4C90-8107-1C731DAA39BE}</SomeTemplate>
<SomeOtherTemplate>{031989CF-691F-423E-B8F5-E2C4738F27C5}</SomeOtherTemplate>
</include>
<stop hint="list:AddStopTemplate">
<SomeFolder>{C7C15FFD-00F4-49C0-990B-9BCE230AA332}</SomeFolder>
<SomeType>{4CDB13A0-780A-4398-B911-CF905403AF52}</SomeType>
</stop>
<stop hint="list:AddStopItem">
<SomeSpecificItem>{8F25BC6D-8CB6-48B1-B4A6-0BAC0670097C}</SomeSpecificItem>
<SomeOtherItem>{662E0B66-C208-4365-811A-BA0AED31D87C}</SomeOtherItem>
<SomeItemByName>my-item</SomeItemByName>
</stop>
</crawler>
</locations>
</index>
</indexes>
</configuration>
</contentSearch>
</sitecore>
</configuration>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment