Skip to content

Instantly share code, notes, and snippets.

@blacktambourine
Last active November 13, 2017 08:01
Show Gist options
  • Save blacktambourine/1c3f8b43800fdfc43fdc68bcc9293960 to your computer and use it in GitHub Desktop.
Save blacktambourine/1c3f8b43800fdfc43fdc68bcc9293960 to your computer and use it in GitHub Desktop.
Custom Item Crawler for Lucene to crawl Json data
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Linq;
using System.Threading;
using Newtonsoft.Json;
//using CustomModel.Flights.Model;
using Sitecore.Collections;
using Sitecore.ContentSearch;
using Sitecore.ContentSearch.Abstractions;
using Sitecore.ContentSearch.Diagnostics;
using Sitecore.ContentSearch.Pipelines.GetContextIndex;
using Sitecore.Data;
using Sitecore.Data.Items;
using Sitecore.Data.Managers;
using Sitecore.Diagnostics;
using Sitecore.Globalization;
using Sitecore.SecurityModel;
using Version = Sitecore.Data.Version;
namespace Business.Search.Json
{
public class FidsJsonItemCrawler : HierarchicalDataCrawler<SitecoreIndexableItem>, IContextIndexRankable
{
private string database;
private string root;
private Item rootItem;
private volatile int rootItemErrorLogged;
public FidsJsonItemCrawler()
{
}
public FidsJsonItemCrawler(IIndexOperations indexOperations): base(indexOperations)
{
}
public string Database
{
get
{
if (!string.IsNullOrEmpty(this.database))
return this.database;
return (string)null;
}
set
{
this.database = value;
}
}
public string Root
{
get
{
if (string.IsNullOrEmpty(this.root))
{
Sitecore.Data.Database database = ContentSearchManager.Locator.GetInstance<IFactory>().GetDatabase(this.database);
Assert.IsNotNull((object)database, "Database " + this.database + " does not exist");
using (new SecurityDisabler())
{
this.root = database.GetRootItem().ID.ToString();
}
}
return this.root;
}
set
{
this.root = value;
this.rootItem = (Item)null;
}
}
public Item RootItem
{
get
{
this.rootItem = this.GetRootItem();
if (this.rootItem == null)
throw new InvalidOperationException(string.Format("[Index={0}, Crawler={1}, Database={2}] Root item could not be found: {3}.", this.index != null ? (object)this.index.Name : (object)"NULL", (object)typeof(SitecoreItemCrawler).Name, (object)this.database, (object)this.root));
return this.rootItem;
}
}
private Item GetRootItem()
{
if (this.rootItem == null)
{
Sitecore.Data.Database database = ContentSearchManager.Locator.GetInstance<IFactory>().GetDatabase(this.database);
Assert.IsNotNull((object)database, "Database " + this.database + " does not exist");
using (new SecurityDisabler())
{
this.rootItem = database.GetItem(this.Root);
if (this.rootItem == null)
{
if (this.rootItemErrorLogged == 0)
{
Interlocked.Increment(ref this.rootItemErrorLogged);
string message = string.Format("[Index={0}, Crawler={1}, Database={2}] Root item could not be found: {3}.", this.index != null ? (object)this.index.Name : (object)"NULL", (object)typeof(SitecoreItemCrawler).Name, (object)this.database, (object)this.root);
CrawlingLog.Log.Error(message, (Exception)null);
Log.Error(message, (object)this);
}
}
}
}
return this.rootItem;
}
public override void Initialize(ISearchIndex index)
{
Assert.ArgumentNotNull((object)index, nameof(index));
Assert.IsNotNull((object)this.Database, "Database element not set.");
Assert.IsNotNull((object)this.Root, "Root element not set.");
if (this.Operations == null)
{
this.Operations = index.Operations;
CrawlingLog.Log.Info(string.Format("[Index={0}] Initializing {3}. DB:{1} / Root:{2}", (object)index.Name, (object)this.Database, (object)this.Root, (object)typeof(SitecoreItemCrawler).Name), (Exception)null);
}
base.Initialize(index);
}
public virtual int GetContextIndexRanking(IIndexable indexable)
{
SitecoreIndexableItem sitecoreIndexableItem = indexable as SitecoreIndexableItem;
if (sitecoreIndexableItem == null || this.GetRootItem() == null)
return int.MaxValue;
Item obj = (Item)sitecoreIndexableItem;
using (new SecurityDisabler())
{
using (new SitecoreCachesDisabler())
return obj.Axes.Level - this.RootItem.Axes.Level;
}
}
public override bool IsExcludedFromIndex(IIndexable indexable)
{
return this.IsExcludedFromIndex((SitecoreIndexableItem)indexable, true);
}
protected override bool IsExcludedFromIndex(SitecoreIndexableItem indexable, bool checkLocation = false)
{
Item obj = (Item)indexable;
Assert.ArgumentNotNull(obj, "item");
IDocumentBuilderOptions documentOptions = this.DocumentOptions;
Assert.IsNotNull(documentOptions, "DocumentOptions");
if (!obj.Database.Name.Equals(this.Database, StringComparison.InvariantCultureIgnoreCase))
{
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", this.index.Name, obj.Uri);
return true;
}
if (checkLocation)
{
if (this.GetRootItem() == null)
return true;
if (!this.IsAncestorOf(obj))
{
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", this.index.Name, obj.Uri);
return true;
}
}
if (documentOptions.HasIncludedTemplates)
{
if (documentOptions.HasExcludedTemplates)
CrawlingLog.Log.Warn("You have specified both IncludeTemplates and ExcludeTemplates. This logic is not supported. Exclude templates will be ignored.", (Exception)null);
if (documentOptions.IncludedTemplates.Contains(obj.TemplateID.ToString()))
return false;
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", (object)this.index.Name, (object)obj.Uri);
return true;
}
if (documentOptions.HasIncludedTemplates)
{
if (documentOptions.HasExcludedTemplates)
CrawlingLog.Log.Warn("You have specified both IncludeTemplates and ExcludeTemplates. This logic is not supported. Exclude templates will be ignored.", (Exception)null);
if (documentOptions.IncludedTemplates.Contains(((object)obj.TemplateID).ToString()))
return false;
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", (object)this.index.Name, (object)obj.Uri);
return true;
}
if (!documentOptions.ExcludedTemplates.Contains(((object)obj.TemplateID).ToString()))
return false;
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:excludedfromindex", (object)this.index.Name, (object)obj.Uri);
return true;
}
protected virtual bool IsAncestorOf(Item item)
{
using (new SecurityDisabler())
{
using (new SitecoreCachesDisabler())
{
if (this.RootItem != null)
return this.RootItem.Axes.IsAncestorOf(item);
}
}
return false;
}
protected override bool IsExcludedFromIndex(IIndexableUniqueId indexableUniqueId)
{
return !((ItemUri)(indexableUniqueId as SitecoreItemUniqueId)).DatabaseName.Equals(this.Database, StringComparison.InvariantCultureIgnoreCase);
}
protected override void DoAdd(IProviderUpdateContext context, SitecoreIndexableItem indexable)
{
Assert.ArgumentNotNull((object)context, nameof(context));
Assert.ArgumentNotNull((object)indexable, nameof(indexable));
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:adding", (object)context.Index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath);
foreach (Language language in indexable.Item.Languages)
{
Item obj1;
using (new SitecoreCachesDisabler())
{
obj1 = indexable.Item.Database.GetItem(indexable.Item.ID, language, Version.Latest);
}
if (obj1 == null)
{
CrawlingLog.Log.Warn(string.Format("SitecoreItemCrawler : AddItem : Could not build document data {0} - Latest version could not be found. Skipping.", (object)indexable.Item.Uri), (Exception)null);
}
else
{
Item currentVersion;
using (new SitecoreCachesDisabler())
{
currentVersion = obj1.Versions.GetVersions(false).FirstOrDefault(); //should only be one version
}
if (currentVersion == null)
{
return;
}
var json = currentVersion.Fields["RawJson"].Value;
if (json == string.Empty)
{
return;
}
//convert the dates
var dateFormat = new Newtonsoft.Json.Converters.IsoDateTimeConverter();
dateFormat.DateTimeFormat = "yyyy-MM-ddTmm:hh:ss";
//FidsResponse is a custom class to map the Json to
var allFlights = JsonConvert.DeserializeObject<FidsResponse>(json, dateFormat);
if (allFlights != null && allFlights.Airlines != null && allFlights.Airlines.Any())
{
foreach (var airline in allFlights.Airlines)
{
var jsonIndexableItem = (JsonIndexableItem)airline;
jsonIndexableItem.IndexFieldStorageValueFormatter = context.Index.Configuration.IndexFieldStorageValueFormatter;
this.Operations.Delete((IIndexable)indexable, context); //remove the item to ensure it is updated
this.Operations.Add((IIndexable)jsonIndexableItem, context, this.index.Configuration); //add or re-add the item
}
}
}
}
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:added", (object)context.Index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath);
}
protected override void DoUpdate(IProviderUpdateContext context, SitecoreIndexableItem indexable)
{
Assert.ArgumentNotNull((object)context, nameof(context));
Assert.ArgumentNotNull((object)indexable, nameof(indexable));
if (this.IndexUpdateNeedDelete(indexable))
{
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:deleteitem", (object)this.index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath);
this.Operations.Delete((IIndexable)indexable, context);
}
else
{
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:updatingitem", (object)this.index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath);
foreach (Language language in indexable.Item.Languages)
{
Item obj1;
using (new SitecoreCachesDisabler())
obj1 = indexable.Item.Database.GetItem(indexable.Item.ID, language, Sitecore.Data.Version.Latest);
if (obj1 == null)
{
CrawlingLog.Log.Warn(string.Format("SitecoreItemCrawler : Update : Latest version not found for item {0}. Skipping.", (object)indexable.Item.Uri), (Exception)null);
}
else
{
Item currentVersion;
using (new SitecoreCachesDisabler())
{
currentVersion = obj1.Versions.GetVersions(false).FirstOrDefault(); //should only be one version
}
if (currentVersion == null)
{
return;
}
var json = currentVersion.Fields["RawJson"].Value;
if (json == string.Empty)
{
return;
}
//convert the dates
var dateFormat = new Newtonsoft.Json.Converters.IsoDateTimeConverter();
dateFormat.DateTimeFormat = "yyyy-MM-ddTmm:hh:ss";
var allFlights = JsonConvert.DeserializeObject<FidsResponse>(json, dateFormat);
if (allFlights != null && allFlights.Airlines != null && allFlights.Airlines.Any())
{
foreach (var airline in allFlights.Airlines)
{
var jsonIndexableItem = (JsonIndexableItem)airline;
jsonIndexableItem.IndexFieldStorageValueFormatter = context.Index.Configuration.IndexFieldStorageValueFormatter;
this.Operations.Update((IIndexable)jsonIndexableItem, context, this.index.Configuration);
}
}
}
}
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:updateditem", (object)this.index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath);
if (!this.DocumentOptions.ProcessDependencies)
return;
this.Index.Locator.GetInstance<IEvent>().RaiseEvent("indexing:updatedependents", (object)this.index.Name, (object)indexable.UniqueId, (object)indexable.AbsolutePath);
this.UpdateDependents(context, indexable);
}
}
private void UpdateClones(IProviderUpdateContext context, SitecoreIndexableItem versionIndexable)
{
//not applicable
}
internal SitecoreIndexableItem PrepareIndexableVersion(Item item, IProviderUpdateContext context)
{
SitecoreIndexableItem sitecoreIndexableItem = (SitecoreIndexableItem)item;
((IIndexableBuiltinFields)sitecoreIndexableItem).IsLatestVersion = item.Versions.IsLatestVersion();
sitecoreIndexableItem.IndexFieldStorageValueFormatter = context.Index.Configuration.IndexFieldStorageValueFormatter;
return sitecoreIndexableItem;
}
protected override SitecoreIndexableItem GetIndexable(IIndexableUniqueId indexableUniqueId)
{
using (new SecurityDisabler())
{
using (new SitecoreCachesDisabler())
return (SitecoreIndexableItem)Sitecore.Data.Database.GetItem((ItemUri)(indexableUniqueId as SitecoreItemUniqueId));
}
}
protected override bool GroupShouldBeDeleted(IIndexableId indexableId)
{
Assert.ArgumentNotNull((object)indexableId, nameof(indexableId));
SitecoreItemId sitecoreItemId = indexableId as SitecoreItemId;
if (sitecoreItemId == null)
return false;
Sitecore.Data.Database database = this.Index.Locator.GetInstance<IFactory>().GetDatabase(this.Database);
Item obj;
using (new SitecoreCachesDisabler())
obj = database.GetItem((ID)sitecoreItemId);
return obj == null;
}
protected override SitecoreIndexableItem GetIndexableAndCheckDeletes(IIndexableUniqueId indexableUniqueId)
{
ItemUri itemUri = (ItemUri)(indexableUniqueId as SitecoreItemUniqueId);
using (new SecurityDisabler())
{
Item obj1;
using (new SitecoreCachesDisabler())
obj1 = Sitecore.Data.Database.GetItem(itemUri);
if (obj1 != null)
{
Item obj2 = Sitecore.Data.Database.GetItem(new ItemUri(itemUri.ItemID, itemUri.Language, Version.Latest, itemUri.DatabaseName));
Version[] versionArray;
using (new SitecoreCachesDisabler())
versionArray = obj2.Versions.GetVersionNumbers() ?? new Version[0];
if (((IEnumerable<Version>)versionArray).All<Version>((Func<Version, bool>)(v => v.Number != itemUri.Version.Number)))
obj1 = (Item)null;
}
return (SitecoreIndexableItem)obj1;
}
}
protected override bool IndexUpdateNeedDelete(SitecoreIndexableItem indexable)
{
return false;
}
protected override IEnumerable<IIndexableUniqueId> GetIndexablesToUpdateOnDelete(IIndexableUniqueId indexableUniqueId)
{
ItemUri itemUri = indexableUniqueId.Value as ItemUri;
using (new SecurityDisabler())
{
ItemUri latestItemUri = new ItemUri(itemUri.ItemID, itemUri.Language, Version.Latest, itemUri.DatabaseName);
Item latestItem;
using (new SitecoreCachesDisabler())
latestItem = Sitecore.Data.Database.GetItem(latestItemUri);
if (latestItem != null && latestItem.Version.Number < itemUri.Version.Number)
yield return (IIndexableUniqueId)new SitecoreItemUniqueId(latestItem.Uri);
}
}
public override SitecoreIndexableItem GetIndexableRoot()
{
using (new SecurityDisabler())
return (SitecoreIndexableItem)this.RootItem;
}
protected override IEnumerable<IIndexableId> GetIndexableChildrenIds(SitecoreIndexableItem parent)
{
ChildList childList = this.GetChildList(parent.Item);
if (childList.Count == 0)
return (IEnumerable<IIndexableId>)null;
return (IEnumerable<IIndexableId>)((IEnumerable<Item>)childList).Select<Item, SitecoreItemId>((Func<Item, SitecoreItemId>)(i => (SitecoreItemId)i.ID));
}
protected override IEnumerable<SitecoreIndexableItem> GetIndexableChildren(SitecoreIndexableItem parent)
{
ChildList childList = this.GetChildList(parent.Item);
if (childList.Count == 0)
return (IEnumerable<SitecoreIndexableItem>)null;
return ((IEnumerable<Item>)childList).Select<Item, SitecoreIndexableItem>((Func<Item, SitecoreIndexableItem>)(i => (SitecoreIndexableItem)i));
}
protected virtual ChildList GetChildList(Item parent)
{
using (new SitecoreCachesDisabler())
return parent.GetChildren((ChildListOptions)5);
}
protected override SitecoreIndexableItem GetIndexable(IIndexableId indexableId, CultureInfo culture)
{
using (new SecurityDisabler())
{
using (new SitecoreCachesDisabler())
{
Language language = LanguageManager.GetLanguage(culture.Name, this.RootItem.Database);
return (SitecoreIndexableItem)ItemManager.GetItem((ID)(indexableId as SitecoreItemId), language, Version.Latest, this.RootItem.Database, (SecurityCheck)1);
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment