Sitecore Filtered Item Crawler
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using Sitecore; | |
using Sitecore.Collections; | |
using Sitecore.ContentSearch; | |
using Sitecore.Data; | |
using Sitecore.Data.Items; | |
using Sitecore.Diagnostics; | |
namespace My.Namespace | |
{ | |
/// <summary> | |
/// Represents a custom item crawler that has the following improvements: | |
/// * Ability to specify what items to index based on templates (or base templates) | |
/// * Ability to stop crawling items and its descendants based on template, item id or item name | |
/// </summary> | |
public class FilteredSitecoreItemCrawler : SitecoreSupportPatches.Search.SitecoreItemCrawler | |
{ | |
private readonly ISet<Guid> _excludedTemplates = new HashSet<Guid>(); | |
private readonly ISet<Guid> _stopTemplates = new HashSet<Guid>(); | |
private readonly ISet<Guid> _stopItemIds = new HashSet<Guid>(); | |
private readonly ISet<string> _stopItemNames = new HashSet<string>(StringComparer.InvariantCultureIgnoreCase); | |
private readonly ISet<Guid> _includedTemplates = new HashSet<Guid>(); | |
/// <summary> | |
/// Represent the config section <include hint="list:AddIncludedTemplate"> | |
/// to list templates that should be included in the index | |
/// </summary> | |
/// <param name="templateId"></param> | |
[UsedImplicitly] | |
public virtual void AddIncludedTemplate(string templateId) | |
{ | |
Assert.ArgumentNotNull(templateId, nameof(templateId)); | |
Assert.IsTrue(ID.IsID(templateId), $"Configuration: AddIncludedTemplate entry is not a valid GUID. Template ID Value: {templateId}"); | |
_includedTemplates.Add(ID.Parse(templateId).Guid); | |
} | |
/// <summary> | |
/// Represents the config section <stop hint="list:AddStopTemplate"> | |
/// to list templates from where no descendants should be processed | |
/// </summary> | |
/// <param name="templateId"></param> | |
[UsedImplicitly] | |
public virtual void AddStopTemplate(string templateId) | |
{ | |
Assert.ArgumentNotNull(templateId, nameof(templateId)); | |
Assert.IsTrue(ID.IsID(templateId), $"Configuration: AddIncludedTemplate entry is not a valid GUID. Template ID Value: {templateId}"); | |
_stopTemplates.Add(ID.Parse(templateId).Guid); | |
} | |
/// <summary> | |
/// Represents the config section <stop hint="list:AddStopItem"> | |
/// to list item names or item guids where no descendants should be processed | |
/// </summary> | |
/// <param name="item"></param> | |
[UsedImplicitly] | |
public virtual void AddStopItem(string item) | |
{ | |
Assert.ArgumentNotNull(item, nameof(item)); | |
if (ID.IsID(item)) | |
{ | |
_stopItemIds.Add(ID.Parse(item).Guid); | |
} | |
else | |
{ | |
_stopItemNames.Add(item); | |
} | |
} | |
protected override bool IsExcludedFromIndex(SitecoreIndexableItem indexable, bool checkLocation = false) | |
{ | |
var item = (Item)indexable; | |
Assert.ArgumentNotNull(item, "item"); | |
if (_excludedTemplates.Contains(item.TemplateID.Guid) || _stopTemplates.Contains(item.TemplateID.Guid)) | |
return true; | |
foreach (var includedTemplate in _includedTemplates) | |
{ | |
if (item.DescendsFrom(includedTemplate)) | |
{ | |
return base.IsExcludedFromIndex(indexable, checkLocation); | |
} | |
} | |
// Learn from previous template types to faster ignore items we don't need to index | |
_excludedTemplates.Add(item.TemplateID.Guid); | |
return true; | |
} | |
/// <summary> | |
/// This method extends the built in one, by removing child elements that are | |
/// in the stop list of template types | |
/// </summary> | |
/// <param name="parent"></param> | |
/// <returns></returns> | |
protected override ChildList GetChildList(SitecoreIndexableItem parent) | |
{ | |
var unfilteredChildList = base.GetChildList(parent); | |
var itemList = new ItemList(); | |
foreach (Item item in unfilteredChildList) | |
{ | |
if (_stopItemIds.Contains(item.ID.Guid)) | |
continue; | |
if (_stopItemNames.Contains(item.Name)) | |
continue; | |
if (_stopTemplates.Any(st => item.DescendsFrom(st))) | |
continue; | |
itemList.Add(item); | |
} | |
return new ChildList(unfilteredChildList.OwnerItem, itemList); | |
} | |
} | |
} | |
<?xml version="1.0" encoding="utf-8" ?> | |
<configuration xmlns:patch="http://www.sitecore.net/xmlconfig/" xmlns:set="http://www.sitecore.net/xmlconfig/set/" | |
xmlns:role="http://www.sitecore.net/xmlconfig/role/" xmlns:env="http://www.sitecore.net/xmlconfig/env/" | |
xmlns:search="http://www.sitecore.net/xmlconfig/search/"> | |
<sitecore search:require="Solr"> | |
<contentSearch> | |
<configuration> | |
<indexes> | |
<index id="myIndex"> | |
<locations hint="list:AddCrawler"> | |
<crawler type="My.Namespace.FilteredSitecoreItemCrawler, My.Assembly"> | |
<Database>master</Database> | |
<Root>/sitecore/content</Root> | |
<include hint="list:AddIncludedTemplate"> | |
<SomeTemplate>{7C4770FB-94E3-4C90-8107-1C731DAA39BE}</SomeTemplate> | |
<SomeOtherTemplate>{031989CF-691F-423E-B8F5-E2C4738F27C5}</SomeOtherTemplate> | |
</include> | |
<stop hint="list:AddStopTemplate"> | |
<SomeFolder>{C7C15FFD-00F4-49C0-990B-9BCE230AA332}</SomeFolder> | |
<SomeType>{4CDB13A0-780A-4398-B911-CF905403AF52}</SomeType> | |
</stop> | |
<stop hint="list:AddStopItem"> | |
<SomeSpecificItem>{8F25BC6D-8CB6-48B1-B4A6-0BAC0670097C}</SomeSpecificItem> | |
<SomeOtherItem>{662E0B66-C208-4365-811A-BA0AED31D87C}</SomeOtherItem> | |
<SomeItemByName>my-item</SomeItemByName> | |
</stop> | |
</crawler> | |
</locations> | |
</index> | |
</indexes> | |
</configuration> | |
</contentSearch> | |
</sitecore> | |
</configuration> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment