Skip to content

Instantly share code, notes, and snippets.

@josheinstein
Last active August 29, 2015 14:19
Show Gist options
  • Save josheinstein/a89a75e869d06a4ad1f4 to your computer and use it in GitHub Desktop.
Save josheinstein/a89a75e869d06a4ad1f4 to your computer and use it in GitHub Desktop.
When doing batch operations in Azure tables, you gotta be sure all entities are in the same partition. Here's some helpful, generally useful code for taking any IEnumerable of table entities and turning it into an IEnumerable of BATCHES of table entities. Nerd boner!
internal static class ExtensionMethods
{
/// <summary>
/// The largest batch size allowed by Azure table storage.
/// </summary>
public const int MaxBatchSize = 100;
/// <summary>
/// Given a sequence of <see cref="ITableEntity"/> objects, returns them in batches of up to 100 entities
/// at a time so that they can be effeciently used in Azure table batch operations.
/// </summary>
/// <param name="source">The collection of entities to lump into batches.</param>
/// <param name="sort">If true, the collection is first sorted by partition key. If the collection is already sorted, pass false.</param>
/// <param name="batchSize">The maximum number of entities to include in a batch, up to 100.</param>
/// <returns>A collection of arrays, with up to <paramref name="batchSize"/> number of entities in each array.</returns>
public static IEnumerable<T[]> BatchByPartitionKey<T>( this IEnumerable<T> source, bool sort = true, int batchSize = 100 ) where T: ITableEntity
{
if ( source == null ) { throw new ArgumentNullException("source"); }
if ( batchSize > MaxBatchSize || batchSize < 1 ) {
throw new ArgumentOutOfRangeException("batchSize", batchSize, "batchSize must be between 1 and " + MaxBatchSize + ".");
}
var batch = new List<T>( batchSize );
if ( sort ) { source = source.OrderBy( x => x.PartitionKey ); }
string batchKey = null;
foreach ( var item in source ) {
// Do we have a new partition key?
// If so, then flush the current batch.
if ( batch.Count > 0 && ( item.PartitionKey != batchKey || batch.Count == batchSize ) ) {
yield return batch.ToArray( );
batch.Clear( );
}
batchKey = item.PartitionKey;
batch.Add( item );
}
// Flush the final batch.
if ( batch.Count > 0 ) {
yield return batch.ToArray( );
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment