-
-
Save CharlieDigital/51e7457a01c5ade7c172771bdaf82e1c to your computer and use it in GitHub Desktop.
// Generate a set of 100 records, each with a random wait interval. | |
using System.Collections.Immutable; | |
using System.Diagnostics; | |
using System.Threading.Channels; | |
var log = (object msg) => Console.WriteLine(msg); | |
var workload = Enumerable | |
.Range(0, 100) | |
.Select(i => (Index: i, Delay: Random.Shared.Next(10, 50))) | |
.ToImmutableArray(); | |
// Using System.Threading.Channels | |
await InstrumentedRun("Channel", async () => { | |
var channel = Channel.CreateUnbounded<int>(); | |
async Task Run(ChannelWriter<int> writer, int id, int delay) { | |
await Task.Delay(delay); | |
await writer.WriteAsync(id); | |
} | |
async Task Receive(ChannelReader<int> reader) { | |
while (await reader.WaitToReadAsync()) { | |
if (reader.TryRead(out var id)) { | |
// No work here. | |
//log($" Completed {id}"); | |
} | |
} | |
} | |
var receiveTask = Receive(channel.Reader); | |
var processingTasks = workload | |
.AsParallel() | |
.Select(e => Run(channel.Writer, e.Index, e.Delay)); | |
await Task | |
.WhenAll(processingTasks) | |
.ContinueWith(_ => channel.Writer.Complete()); | |
await receiveTask; | |
}); | |
// Using Parallel.For with concurrency of 4 | |
await InstrumentedRun("Parallel.For @ 4", () => { | |
Parallel.For(0, 100, new ParallelOptions { MaxDegreeOfParallelism = 4 }, (index) => { | |
Thread.Sleep(workload[index].Delay); | |
}); | |
return Task.CompletedTask; | |
}); | |
// Using Parallel.ForEachAsync with concurrency of 4 | |
await InstrumentedRun("Parallel.ForEachAsync @ 4", async () => | |
await Parallel.ForEachAsync(workload, new ParallelOptions { MaxDegreeOfParallelism = 4 }, async (item, cancel) => { | |
await Task.Delay(item.Delay, cancel); | |
}) | |
); | |
// Using Parallel.ForEachAsync with concurrency of 40 | |
await InstrumentedRun("Parallel.ForEachAsync @ 40", async () => | |
await Parallel.ForEachAsync(workload, new ParallelOptions { MaxDegreeOfParallelism = 40 }, async (item, cancel) => { | |
await Task.Delay(item.Delay, cancel); | |
}) | |
); | |
// Using Parallel.ForEachAsync with concurrency unset | |
await InstrumentedRun("Parallel.ForEachAsync (Default)", async () => | |
await Parallel.ForEachAsync(workload, async (item, cancel) => { | |
await Task.Delay(item.Delay, cancel); | |
}) | |
); | |
/*----------------------------------------------------------- | |
* Supporting functions | |
---------------------------------------------------------*/ | |
async Task InstrumentedRun(string name, Func<Task> test) { | |
var threadsAtStart = Process.GetCurrentProcess().Threads.Count; | |
var timer = new Stopwatch(); | |
timer.Start(); | |
await test(); | |
timer.Stop(); | |
Console.WriteLine($"[{name}] = {timer.ElapsedMilliseconds}ms"); | |
Console.WriteLine($" ⮑ {threadsAtStart} threads at start"); | |
Console.WriteLine($" ⮑ {Process.GetCurrentProcess().Threads.Count} threads at end"); | |
} | |
/* | |
YMMV since each run uses a random workload. | |
[Channel] = 68ms | |
⮑ 8 threads at start | |
⮑ 19 threads at end | |
[Parallel.For @ 4] = 799ms | |
⮑ 19 threads at start | |
⮑ 19 threads at end | |
[Parallel.ForEachAsync @ 4] = 754ms | |
⮑ 19 threads at start | |
⮑ 19 threads at end | |
[Parallel.ForEachAsync @ 40] = 100ms | |
⮑ 19 threads at start | |
⮑ 19 threads at end | |
[Parallel.ForEachAsync (Default)] = 384ms | |
⮑ 19 threads at start | |
⮑ 19 threads at end | |
*/ |
That is interesting. I did verify your findings, with
dotnet publish -c Release -r linux-x64 --self-contained -p:PublishNativeAot=true /p:PublishSingleFile=true
[Channel] = 114ms
⮑ 7 threads at start
⮑ 14 threads at end
[Parallel.For @ 4] = 966ms
⮑ 7 threads at start
⮑ 13 threads at end
[Parallel.ForEachAsync @ 4] = 1034ms
⮑ 7 threads at start
⮑ 14 threads at end
[Parallel.ForEachAsync @ 40] = 164ms
⮑ 7 threads at start
⮑ 14 threads at end
[Parallel.ForEachAsync (Default)] = 1022ms
⮑ 7 threads at start
⮑ 14 threads at end
@AliveDevil it's the initialization of the Channel
instance. Once it's taken out of the instrumented scope, the numbers match up again.
(I don't know the underlying reason why the instantiation of the Channel
would be different in this case).
Processor Intel Core i5-13600K (14 Cores, 20 Logical processors), OS Windows 11, .Net 7.0.13
The number of threads does not match the example at all
[Channel] = 78ms
8 threads at start
30 threads at end
[Parallel.For @ 4] = 974ms
30 threads at start
30 threads at end
[Parallel.ForEachAsync @ 4] = 919ms
30 threads at start
30 threads at end
[Parallel.ForEachAsync @ 40] = 113ms
30 threads at start
31 threads at end
[Parallel.ForEachAsync (Default)] = 210ms
31 threads at start
31 threads at end
@nickgrishaev I wouldn't expect it to since the thread allocation has the # of cores as a factor and I'm on a Mac M1.
With AOT on .NET 8 Preview (.NET 7 does not support it on macOS).
Release + AOT + Single File + Trimmed:
Release only + Single File:
Release + AOT + Single File:
Release + AOT + Trimmed:
It seems that the trimming somehow negatively affects the
Channel
(rather than theParallel
options being optimized).