Skip to content

Instantly share code, notes, and snippets.

@Arithmomaniac
Created August 8, 2017 21:47
Show Gist options
  • Save Arithmomaniac/2a36321ae2e613b87f84a5749ef88c82 to your computer and use it in GitHub Desktop.
Save Arithmomaniac/2a36321ae2e613b87f84a5749ef88c82 to your computer and use it in GitHub Desktop.
GroupBatch naive implementation
public static class GroupBatchExtension
{
/// <summary>A simple IGrouping implementation used by GroupBatch.</summary>
private class GroupBatchGrouping<TKey, TElement> : List<TElement>, IGrouping<TKey, TElement>
{
public GroupBatchGrouping(int rank, TKey key, IEnumerable<TElement> values):base(values)
{
Key = key;
Rank = rank;
}
public TKey Key { get; private set; }
//an actual implementation would not keep this inside of the returned object.
internal int Rank { get; private set; }
}
/// <summary>
/// Yields items in batches of at most a specific size, where each batch shares a key in common
/// </summary>
/// <typeparam name="TKey">The type of the key for the batch</typeparam>
/// <typeparam name="TSource">The type of the enumerable to batch</typeparam>
/// <param name="source">THe source enumerable</param>
/// <param name="keySelector">The selector to determine the key from the items</param>
/// <param name="size">The maximum number of items in a batch.</param>
/// <returns>A stream of batches of the method's size, followed by any "incomplete" batches in order
/// of when that batch was created.</returns>
public static IEnumerable<IGrouping<TKey, TSource>> GroupBatch<TKey, TSource>(this IEnumerable<TSource> source, Func<TSource, TKey> keySelector, int size)
{
if (source == null) throw new ArgumentNullException(nameof(source));
if (size <= 0) throw new ArgumentOutOfRangeException(nameof(size));
if (keySelector == null) throw new ArgumentNullException(nameof(keySelector));
var groupingDict = new Dictionary<TKey, GroupBatchGrouping<TKey, TSource>>();
int i = 0;
foreach (var item in source)
{
var key = keySelector(item);
//ensure the batch exists for the given item
GroupBatchGrouping<TKey, TSource> grouping;
if (groupingDict.TryGetValue(key, out grouping))
{
groupingDict[key] = grouping = new GroupBatchGrouping<TKey, TSource>(i, key, new List<TSource>());
i++;
}
//add the item for that key to the batch in progress
grouping.Add(item);
//yield the batch in progress for that key if full
if (grouping.Count == size)
{
yield return grouping;
groupingDict.Remove(key);
}
}
//yield remaining batches in ascending order of creation
foreach (var grouping in groupingDict.Values.OrderBy(x => x.Rank)
{
yield return grouping;
}
}
}
[TestClass]
public class GroupBatchExtension
{
[TestMethod]
public void GroupBatch()
{
var itemsToGroup = new[] { "AA", "BA", "BB", "AB", "AC", "BC", "CA", "CB", "BD", "BE" };
var batches = EnumerableExtension.GroupBatch(itemsToGroup, x => x[0], 2).ToList();
//expect five batches.
//first, expect one with key of "B", because hits batch size of two first.
//then one of "A", because does second.
//then one of "C", because does third.
//then we hit "A" again.
//Nothing hits batch size again, so we then look at when their batch size was reset.
//We skip C's batch, because it is empty.
// Since "A"'s current batch was reset longer ago then "B" was, "A" goes first, then "B". (even though
//B was the first batch reset overall).
Assert.AreEqual(6, batches.Count);
Assert.AreEqual('B', batches[0].Key);
CollectionAssert.AreEqual((List<string>)batches[0], new[] { "BA", "BB" });
Assert.AreEqual('A', batches[1].Key);
CollectionAssert.AreEqual((List<string>)batches[1], new[] { "AA", "AB" });
Assert.AreEqual('C', batches[2].Key);
CollectionAssert.AreEqual((List<string>)batches[2], new[] { "CA", "CB" });
Assert.AreEqual('B', batches[3].Key);
CollectionAssert.AreEqual((List<string>)batches[3], new[] { "BC", "BD" });
Assert.AreEqual('A', batches[4].Key);
CollectionAssert.AreEqual((List<string>)batches[4], new[] { "AC" });
Assert.AreEqual('B', batches[5].Key);
CollectionAssert.AreEqual((List<string>)batches[5], new[] { "BE" });
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment