Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Demonstrating potential side effects (multiple evaluation) due to fast partitioning method from Stack Overflow answer http://stackoverflow.com/a/13745058/1037948

Explanation

After using the suggested Partition solution on a set of data that had calculations applied to each element before being enumerated (see scenarios below), I noticed the application took much longer. Investigated via tests.cs and discovered that each item will be evaluated twice. See differences in sideeffect in results.

Original Scenario

var items = database.Fetch(filter).ToList();
var calculatedStuff = items.Select(item => new CalculatedItem(item));
// other stuff
var file = new SomeFileHelperThing("filename");
foreach(var item in calculatedStuff) file.SerializeAndAppend(item);

Partitioned Scenario

var items = database.Fetch(filter).ToList();
var calculatedStuff = items.Select(item => new CalculatedItem(item));
// other stuff
int i = 0;
foreach(var group in calculatedStuff.Partition(100)) {	// <-- enumerates `calculatedStuff`, creating a `CalculatedItem` for each, before returning each group
	var file = new SomeFileHelperThing("filename" + ++i);
	foreach(var item in group) file.SerializeAndAppend(item);  // <-- enumerates the group, again creating a `CalculatedItem` for each
}

Results

Also note that inner/outer enumeration affects the results...

Linear

result = 132, sideeffect = 12

Nawfal Partition

result = 132, sideeffect = 39

Safe Partition

result = 132, sideeffect = 12

Split

result = 132, sideeffect = 12

Segment

result = 132, sideeffect = 12

Chunks

result = 132, sideeffect = 12

void Main()
{
// investigating side effects + performance of some suggestions
// * http://stackoverflow.com/questions/3773403/linq-partition-list-into-lists-of-8-members
// * http://stackoverflow.com/questions/419019/split-list-into-sublists-with-linq
// * http://stackoverflow.com/questions/438188/split-a-collection-into-n-parts-with-linq
// examine side effects
int sideEffect = 0, result = 0, size = 5;
var list = Enumerable.Range(0, (int)Math.Round(size*2.5)).Select(i => {
sideEffect++; // just to mark how many times this occurs
return i * 2; // simulate some delayed work
});
// control case
sideEffect = 0;
result = list.Sum();
string.Format("result = {0}, sideeffect = {1}", result, sideEffect).Dump("Linear");
// compare results -- some mentions that actually evaluating may screw with ordering...
var results = new Dictionary<string, IEnumerable<IEnumerable<int>>>() {
{ "Nawfal Partition", list.Partition(size) }
, { "Safe Partition", list.SafePartition(size) }
, { "Split", list.Split(size) }
, { "Segment", list.Segment(size) }
, { "Chunks", list.Chunks(size) }
};
foreach(var r in results) {
sideEffect = 0;
result = r.Value.Select(g => g.Sum()).Sum();
string.Format("result = {0}, sideeffect = {1}", result, sideEffect).Dump(r.Key);
}
Util.HorizontalRun(string.Join(",", results.Keys), results.Values).Dump("Results");
Util.HorizontalRun(string.Join(",", results.Keys), results.Values.Select(r => r.ToList()).ToArray()).Dump("Enumerated Results");
Util.HorizontalRun(string.Join(",", results.Keys), results.Values.Select(r => r.Select(o => o.ToArray()).ToArray()).ToArray()).Dump("Inner-Enumerated Results");
Util.HorizontalRun(string.Join(",", results.Keys), results.Values.Select(r => r.ToArray().Select(o => o.ToArray())).ToArray()).Dump("Outer-Enumerated Results");
}
public static class CollectionExtensions {
// http://stackoverflow.com/a/13745058/1037948 nawfal
public static IEnumerable<IEnumerable<T>> Partition<T>(this IEnumerable<T> items,
int partitionSize)
{
if (partitionSize <= 0)
throw new ArgumentOutOfRangeException("partitionSize");
int innerListCounter = 0;
int numberOfPackets = 0;
foreach (var item in items)
{
innerListCounter++;
if (innerListCounter == partitionSize)
{
yield return items.Skip(numberOfPackets * partitionSize).Take(partitionSize);
innerListCounter = 0;
numberOfPackets++;
}
}
if (innerListCounter > 0)
yield return items.Skip(numberOfPackets * partitionSize);
}
public static IEnumerable<IEnumerable<T>> SafePartition<T>(this IEnumerable<T> items,
int partitionSize)
{
int i = 0;
return items.GroupBy(x => i++ / partitionSize);
}
// modified from http://stackoverflow.com/a/28682295/1037948
public static IEnumerable<IEnumerable<T>> Split<T>(this IEnumerable<T> items, int partitionSize) {
if (partitionSize < 1) throw new ArgumentException("Must be positive", "partitionSize");
using (var enumer = items.GetEnumerator())
while (enumer.MoveNext())
{
// weird -- if you don't enumerate the inner list on consumption, it will return a list of each original item as an enumerable using this line...
//yield return Take(enumer.Current, enumer, partitionSize);
// but if you use the following, without enumerating the inner list on consumption it will return the appropriate number of chunks with 1 item each (the first from that segment)
var remaining = partitionSize;
yield return Take(enumer.Current, enumer, () => --remaining == 0);
// must discard elements skipped by inner iterator so that `.ToList()` (rather than `.Select(o => o.ToList()).ToList()`) doesn't return n segments instead
while (--remaining > 0 && enumer.MoveNext()) { }
}
}
private static IEnumerable<T> Take<T>(T head, IEnumerator<T> tail, int len) {
return Take(head, tail, () => --len == 0);
}
private static IEnumerable<T> Take<T>(T head, IEnumerator<T> tail, Func<bool> stop) {
while (true) {
yield return head;
if (stop()) break;
if (tail.MoveNext()) head = tail.Current;
else break;
}
}
// http://stackoverflow.com/a/4835541/1037948
private static IEnumerable<T> TakeFromCurrent<T>(this IEnumerator<T> enumerator, int count) {
while (count > 0) {
yield return enumerator.Current;
if (--count > 0 && !enumerator.MoveNext()) yield break;
}
}
public static IEnumerable<IEnumerable<T>> Segment<T>(this IEnumerable<T> seq, int partitionSize) {
var enumerator = seq.GetEnumerator();
while (enumerator.MoveNext()) {
yield return enumerator.TakeFromCurrent(partitionSize);
}
}
// http://stackoverflow.com/a/20953521/1037948
public static IEnumerable<IEnumerable<T>> Chunks<T>(this IEnumerable<T> enumerable, int chunkSize) {
if (chunkSize < 1) throw new ArgumentException("chunkSize must be positive");
using (var e = enumerable.GetEnumerator())
while (e.MoveNext()) {
var remaining = chunkSize; // elements remaining in the current chunk
var innerMoveNext = new Func<bool>(() => --remaining > 0 && e.MoveNext());
yield return e.GetChunk(innerMoveNext);
while (innerMoveNext()) {/* discard elements skipped by inner iterator */}
}
}
private static IEnumerable<T> GetChunk<T>(this IEnumerator<T> e, Func<bool> innerMoveNext) {
do yield return e.Current;
while (innerMoveNext());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.