Skip to content

Instantly share code, notes, and snippets.

@jbuedel
Created June 4, 2011 04:28
Show Gist options
  • Save jbuedel/1007581 to your computer and use it in GitHub Desktop.
Save jbuedel/1007581 to your computer and use it in GitHub Desktop.
Josh's Submission To The Linq Challenge
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
public class Class1
{
// This first attempt is simply a straight port to linq. I've kept the original optimization
// that short circuited on the first matching id.
static IList<ComponentInfo> ProcessWordComponentAndUploadInfos_ByJosh_Attempt1(
DocumentCheckinJobInfo jobInfo,
List<ComponentInfo> componentInfos)
{
if (!jobInfo.IsNewDocument)
{
var e = from dc_group in jobInfo.Document.DocumentComponentGroups
from dc in dc_group.DocumentComponents
// Any() will short circuit on the first match
where !componentInfos.Any(x => x.ComponentId == dc.Id)
select new ComponentInfo
{
ComponentTitle = dc.Title,
ComponentId = dc.Id,
RequestJobDocumentUploadItemStatusType = Business.RequestJobDocumentUploadItemStatusType.Deleted
};
return e.ToList();
}
return new List<ComponentInfo>();
}
// Swapped out the full scan of componentInfos for a binary search (which is log(n) iirc).
// This will only help if we can expect componentInfos to have more than a handful of items.
static IList<ComponentInfo> ProcessWordComponentAndUploadInfos_ByJosh_Attempt2(
DocumentCheckinJobInfo jobInfo,
List<ComponentInfo> componentInfos)
{
if (!jobInfo.IsNewDocument)
{
// Extract the ComponentIds into a sorted list.
var id_list = (from ci in componentInfos
orderby ci.ComponentId
select ci.ComponentId).ToList();
var e = from dc_group in jobInfo.Document.DocumentComponentGroups
from dc in dc_group.DocumentComponents
// because the list is sorted we can use a fast BinarySearch
where id_list.BinarySearch(dc.Id) < 0 // assuming default IComparer for Ids is satisfactory.
select new ComponentInfo
{
ComponentTitle = dc.Title,
ComponentId = dc.Id,
RequestJobDocumentUploadItemStatusType = Business.RequestJobDocumentUploadItemStatusType.Deleted
};
return e.ToList();
}
return new List<ComponentInfo>();
}
// My next idea would be to explore parallelism. But that's only going to help if we're talking about
// big sequences. Like in the 1000s or 10,000s. Anything shorter and we'd probably just waste the time task switching.
// This violates the constraints of your problem (answer in linq), but if we're still not fast enough
// then I'd say these ComponentInfos look like something stored in a table. Make a single query against that
// table to return all the missing ids in one fell swoop.
//
// select Id from ComponentInfo where Id not in (<list of ids from all ComponentInfos in jobInfo>)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment