Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
ContinueWith Micro Benchmark
[Config(typeof(Config))]
public class ContinueWithAllocations
{
private class Config : ManualConfig
{
public Config()
{
Add(new MemoryDiagnoser());
}
}
[Params(100, 200, 500)]
public int NumberOfTasks { get; set; }
private ConcurrentDictionary<Task, Task> tasks;
[Setup]
public void SetUp()
{
tasks = new ConcurrentDictionary<Task, Task>();
}
[Benchmark]
public Task ContinueWithClojureCapture()
{
for (int i = 0; i < NumberOfTasks; i++)
{
var runningTask = Task.Delay(1);
tasks.TryAdd(runningTask, runningTask);
#pragma warning disable CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
runningTask.ContinueWith(t =>
{
Task toBeRemoved;
tasks.TryRemove(t, out toBeRemoved);
}, TaskContinuationOptions.ExecuteSynchronously);
#pragma warning restore CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
}
return Task.WhenAll(tasks.Values);
}
[Benchmark]
public Task ContinueWithWithoutClojureCapture()
{
for (int i = 0; i < NumberOfTasks; i++)
{
var runningTask = Task.Delay(1);
tasks.TryAdd(runningTask, runningTask);
#pragma warning disable CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
runningTask.ContinueWith((t, state) =>
{
var runningTasks = (ConcurrentDictionary<Task, Task>) state;
Task toBeRemoved;
runningTasks.TryRemove(t, out toBeRemoved);
}, tasks, TaskContinuationOptions.ExecuteSynchronously);
#pragma warning restore CS4014 // Because this call is not awaited, execution of the current method continues before the call is completed
}
return Task.WhenAll(tasks.Values);
}
}
BenchmarkDotNet=v0.9.7.0
OS=Microsoft Windows NT 6.2.9200.0
Processor=Intel(R) Core(TM) i7-3615QM CPU 2.30GHz, ProcessorCount=8
Frequency=2241002 ticks, Resolution=446.2290 ns, Timer=TSC
HostCLR=MS.NET 4.0.30319.42000, Arch=64-bit RELEASE [RyuJIT]
JitModules=clrjit-v4.6.1080.0

Type=ContinueWithAllocations  Mode=Throughput  
                        Method | Platform |       Jit | NumberOfTasks |        Median |      StdDev | Gen 0 | Gen 1 | Gen 2 | Bytes Allocated/Op |

---------------------------------- |--------- |---------- |-------------- |-------------- |------------ |------ |------ |------ |------------------- | ContinueWithClojureCapture | X64 | LegacyJit | 100 | 584.2819 us | 31.0444 us | 0,00 | 0,00 | 0,00 | 72 771,83 | ContinueWithWithoutClojureCapture | X64 | LegacyJit | 100 | 562.1988 us | 22.2841 us | 0,00 | 0,00 | 0,00 | 72 515,91 | ContinueWithClojureCapture | X64 | RyuJit | 100 | 576.2550 us | 28.2475 us | 0,00 | 0,00 | 0,00 | 72 575,64 | ContinueWithWithoutClojureCapture | X64 | RyuJit | 100 | 565.3582 us | 25.2964 us | 0,00 | 0,00 | 0,00 | 69 739,98 | ContinueWithClojureCapture | X86 | LegacyJit | 100 | NA | NA | - | - | - | NaN | ContinueWithWithoutClojureCapture | X86 | LegacyJit | 100 | NA | NA | - | - | - | NaN | ContinueWithClojureCapture | X64 | LegacyJit | 200 | 931.4733 us | 52.2089 us | 0,00 | 0,00 | 0,00 | 108 105,39 | ContinueWithWithoutClojureCapture | X64 | LegacyJit | 200 | 893.3913 us | 45.0929 us | 0,00 | 0,00 | 0,00 | 92 125,85 | ContinueWithClojureCapture | X64 | RyuJit | 200 | 907.1329 us | 44.0440 us | 0,00 | 0,00 | 0,00 | 108 621,39 | ContinueWithWithoutClojureCapture | X64 | RyuJit | 200 | 927.7449 us | 59.2315 us | 0,00 | 0,00 | 0,00 | 107 089,14 | ContinueWithClojureCapture | X86 | LegacyJit | 200 | NA | NA | - | - | - | NaN | ContinueWithWithoutClojureCapture | X86 | LegacyJit | 200 | NA | NA | - | - | - | NaN | ContinueWithClojureCapture | X64 | LegacyJit | 500 | 1,848.7510 us | 159.8408 us | 0,00 | 0,00 | 0,00 | 226 353,87 | ContinueWithWithoutClojureCapture | X64 | LegacyJit | 500 | 1,838.6481 us | 135.8207 us | 0,00 | 0,00 | 0,00 | 183 727,11 | ContinueWithClojureCapture | X64 | RyuJit | 500 | 1,892.3359 us | 145.9936 us | 0,00 | 0,00 | 0,00 | 132 275,79 | ContinueWithWithoutClojureCapture | X64 | RyuJit | 500 | 1,834.6669 us | 131.8214 us | 0,00 | 0,00 | 0,00 | 187 833,11 | ContinueWithClojureCapture | X86 | LegacyJit | 500 | NA | NA | - | - | - | NaN | ContinueWithWithoutClojureCapture | X86 | LegacyJit | 500 | NA | NA | - | - | - | NaN |

Benchmarks with issues: ContinueWithAllocations_ContinueWithClojureCapture_LegacyX86_NumberOfTasks-100 ContinueWithAllocations_ContinueWithWithoutClojureCapture_LegacyX86_NumberOfTasks-100 ContinueWithAllocations_ContinueWithClojureCapture_LegacyX86_NumberOfTasks-200 ContinueWithAllocations_ContinueWithWithoutClojureCapture_LegacyX86_NumberOfTasks-200 ContinueWithAllocations_ContinueWithClojureCapture_LegacyX86_NumberOfTasks-500 ContinueWithAllocations_ContinueWithWithoutClojureCapture_LegacyX86_NumberOfTasks-500

@mattwarren
Copy link
Author

mattwarren commented Jun 28, 2016

BenchmarkDotNet=v0.9.7.0
OS=Microsoft Windows NT 6.1.7601 Service Pack 1
Processor=Intel(R) Core(TM) i7-4800MQ CPU 2.70GHz, ProcessorCount=8
Frequency=2630683 ticks, Resolution=380.1294 ns, Timer=TSC
HostCLR=MS.NET 4.0.30319.42000, Arch=32-bit RELEASE
JitModules=clrjit-v4.6.1076.0

Type=ContinueWithAllocations  Mode=Throughput  
Method NumberOfTasks Median StdDev Gen 0 Gen 1 Gen 2 Bytes Allocated/Op
ContinueWithClojureCapture 100 484.2225 us 22.7719 us 37.17 30.81 - 41,458.70
ContinueWithWithoutClojureCapture 100 482.6122 us 23.7498 us 39.73 30.97 0.29 44,206.16
ContinueWithClojureCapture 200 761.7816 us 29.4267 us 53.99 45.99 4.00 61,399.55
ContinueWithWithoutClojureCapture 200 770.7198 us 27.5776 us 51.09 51.47 1.91 62,713.22
ContinueWithClojureCapture 500 1,484.0111 us 68.0056 us 84.47 117.62 12.63 125,423.96
ContinueWithWithoutClojureCapture 500 1,426.5611 us 66.3615 us 86.00 87.00 10.00 108,241.09

@mattwarren
Copy link
Author

mattwarren commented Jun 28, 2016

image

@danielmarbach
Copy link

danielmarbach commented Jun 28, 2016

@mattwarren wouldn't you expect ContinueWithWithoutClojureCapture to always have less allocations?

@mattwarren
Copy link
Author

mattwarren commented Jun 28, 2016

@danielmarbach, yep!

I suspect that the large number of tasks being created is skewing the memory usage stats, for instance with NumberOfTasks = 1, 5 and 10, I get the following results:

BenchmarkDotNet=v0.9.7.0
OS=Microsoft Windows NT 6.1.7601 Service Pack 1
Processor=Intel(R) Core(TM) i7-4800MQ CPU 2.70GHz, ProcessorCount=8
Frequency=2630683 ticks, Resolution=380.1294 ns, Timer=TSC
HostCLR=MS.NET 4.0.30319.42000, Arch=32-bit RELEASE
JitModules=clrjit-v4.6.1076.0

Type=ContinueWithAllocations  Mode=Throughput  
Method NumberOfTasks Median StdDev Gen 0 Gen 1 Gen 2 Bytes Allocated/Op
ContinueWithClojureCapture 1 44.7095 us 2.8989 us 23.32 17.69 - 2,714.27
ContinueWithWithoutClojureCapture 1 46.3050 us 2.5083 us 24.68 14.88 - 2,581.80
ContinueWithClojureCapture 5 100.6687 us 3.3937 us 52.89 36.32 - 6,442.19
ContinueWithWithoutClojureCapture 5 98.7882 us 5.3014 us 51.51 44.11 - 6,812.53
ContinueWithClojureCapture 10 131.7470 us 6.3743 us 78.00 59.00 - 10,302.84
ContinueWithWithoutClojureCapture 10 132.9372 us 9.7847 us 82.84 61.36 0.51 10,675.89

@mattwarren
Copy link
Author

mattwarren commented Jun 28, 2016

For a more general look at lambda allocations (i.e. without the task-based stuff) see https://gist.github.com/mattwarren/480848b4eafb5843602eedd510d9e1ae, that shows the difference more clearly

@danielmarbach
Copy link

danielmarbach commented Jun 28, 2016

But even in the update results only with 1 task ContinueWithWithoutClojureCapture allocates less bytes. I all other scenarios ContinueWithClojureCapture seems to allocate less. I have to come up with a better test

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment