Skip to content

Instantly share code, notes, and snippets.

@madelson
Created March 31, 2022 11:14
Show Gist options
  • Save madelson/ec76bc142b6c015a38ed077e80eda3d0 to your computer and use it in GitHub Desktop.
Save madelson/ec76bc142b6c015a38ed077e80eda3d0 to your computer and use it in GitHub Desktop.
Chunk algorithm alternatives for https://github.com/dotnet/runtime/issues/67132
using BenchmarkDotNet.Attributes;
using System;
using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Benchmarks
{
[MemoryDiagnoser]
public class ChunkBenchmark
{
[Params("1000,100", "1000,999", "10000,999", "1000,1000000")]
public string Input = default!;
private int Size;
private int ChunkSize;
private IEnumerable<string> _source = default!;
[GlobalSetup]
public void SetUp()
{
var split = Input.Split(',');
Size = int.Parse(split[0]);
ChunkSize = int.Parse(split[1]);
_source = Enumerable.Range(0, Size).Select(i => i.ToString()).ToList();
// validate that all methods do the same thing
var r1 = _source.Chunk(ChunkSize).ToArray();
var r2 = ChunkList(_source, ChunkSize).ToArray();
var r3 = ChunkFirst(_source, ChunkSize).ToArray();
if (r1.Length != r2.Length || r1.Length != r3.Length) { throw new Exception("Bad length"); }
for (var i = 0; i < r1.Length; ++i)
{
if (!r1[i].SequenceEqual(r2[i]) || !r1[i].SequenceEqual(r3[i])) { throw new Exception("Bad chunk"); }
}
}
[Benchmark(Baseline = true)]
public int ChunkNative()
{
var count = 0;
foreach (var chunk in _source.Chunk(ChunkSize))
{
++count;
}
return count;
}
[Benchmark]
public int ChunkList()
{
var count = 0;
foreach (var chunk in ChunkList(_source, ChunkSize))
{
++count;
}
return count;
}
[Benchmark]
public int ChunkFirst()
{
var count = 0;
foreach (var chunk in ChunkFirst(_source, ChunkSize))
{
++count;
}
return count;
}
public static IEnumerable<TSource[]> ChunkList<TSource>(IEnumerable<TSource> source, int size)
{
if (source == null)
{
ArgumentNullException.ThrowIfNull(source);
}
if (size < 1)
{
ArgumentNullException.ThrowIfNull(source);
}
return ChunkListIterator(source, size);
}
private static IEnumerable<TSource[]> ChunkListIterator<TSource>(IEnumerable<TSource> source, int size)
{
using IEnumerator<TSource> e = source.GetEnumerator();
if (e.MoveNext())
{
List<TSource> chunkBuilder = new();
do
{
chunkBuilder.Add(e.Current);
while (chunkBuilder.Count < size && e.MoveNext())
{
chunkBuilder.Add(e.Current);
}
TSource[] chunk = new TSource[chunkBuilder.Count];
chunkBuilder.CopyTo(chunk, 0);
yield return chunk;
if (chunk.Length < size)
{
yield break;
}
chunkBuilder.Clear();
}
while (e.MoveNext());
}
}
public static IEnumerable<TSource[]> ChunkFirst<TSource>(IEnumerable<TSource> source, int size)
{
if (source == null)
{
ArgumentNullException.ThrowIfNull(source);
}
if (size < 1)
{
ArgumentNullException.ThrowIfNull(source);
}
return ChunkFirstIterator(source, size);
}
private static IEnumerable<TSource[]> ChunkFirstIterator<TSource>(IEnumerable<TSource> source, int size)
{
using IEnumerator<TSource> e = source.GetEnumerator();
//if (size > 1000)
{
if (TryCreateInitialChunk(e, size, out TSource[]? chunk))
{
yield return chunk;
if (chunk.Length < size)
{
yield break;
}
}
else
{
yield break;
}
}
while (e.MoveNext())
{
TSource[] chunk = new TSource[size];
chunk[0] = e.Current;
int i = 1;
for (; i < chunk.Length && e.MoveNext(); i++)
{
chunk[i] = e.Current;
}
if (i == chunk.Length)
{
yield return chunk;
}
else
{
Array.Resize(ref chunk, i);
yield return chunk;
yield break;
}
}
}
private static bool TryCreateInitialChunk<TSource>(IEnumerator<TSource> enumerator, int size, [NotNullWhen(returnValue: true)] out TSource[]? chunk)
{
List<TSource> builder = new();
for (var i = 0; i < size && enumerator.MoveNext(); i++)
{
builder.Add(enumerator.Current);
}
if (builder.Count == 0)
{
chunk = null;
return false;
}
chunk = builder.ToArray();
return true;
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment