Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save meziantou/67735a6837de619506990ac720fa548c to your computer and use it in GitHub Desktop.
Save meziantou/67735a6837de619506990ac720fa548c to your computer and use it in GitHub Desktop.
using System.Buffers;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
using System.Text;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
using BenchmarkDotNet.Toolchains.CsProj;
using BenchmarkDotNet.Toolchains.DotNetCli;
BenchmarkRunner.Run<ReplacePlusWithSpaceBenchmark>();
[DisassemblyDiagnoser(printSource: true, maxDepth: 10)]
[MemoryDiagnoser]
[ReturnValueValidator]
[Config(typeof(CustomPathsConfig))]
public class ReplacePlusWithSpaceBenchmark
{
public class CustomPathsConfig : ManualConfig
{
public CustomPathsConfig()
{
var dotnetCli64bit = new NetCoreAppSettings("net7.0", null, ".NET 7.0.100-preview.6.22307.20")
.WithCustomDotNetCliPath(@"C:\Users\mezia\Downloads\dotnet-sdk-7.0.100-preview.6.22307.20-win-x64\dotnet.exe");
AddJob(Job.RyuJitX64.WithToolchain(CsProjCoreToolchain.From(dotnetCli64bit)));
}
}
[ParamsSource(nameof(ValueSource))]
public string Value { get; set; } = null!;
public IEnumerable<string> ValueSource
{
get
{
for (int i = 0; i < 256; i += 8)
{
yield return CreateString(i);
}
for (int i = 0; i < 100; i += 3)
{
if (i % 8 != 0)
{
yield return CreateString(i);
}
}
static string CreateString(int length)
{
var sb = new StringBuilder();
sb.Append('a', length);
for (int i = 0; i < length; i += 5)
{
sb[i] = '+';
}
return sb.ToString();
}
}
}
//[Benchmark(Baseline = true)]
//public string Basic() => BasicHelper.ReplacePlusWithSpace(Value);
//[Benchmark]
//public string StringReplace() => Value.Replace('+', ' ');
[Benchmark(Baseline = true)]
public string Current() => Vector128Helper_Sse.ReplacePlusWithSpace(Value);
[Benchmark]
public string Vector128() => Vector128Helper.ReplacePlusWithSpace(Value);
[Benchmark]
public string Vector256() => Vector256Helper.ReplacePlusWithSpace(Value);
}
public static class BasicHelper
{
private static readonly SpanAction<char, IntPtr> s_replacePlusWithSpace = ReplacePlusWithSpaceCore;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe string ReplacePlusWithSpace(ReadOnlySpan<char> span)
{
fixed (char* ptr = &MemoryMarshal.GetReference(span))
{
return string.Create(span.Length, (IntPtr)ptr, s_replacePlusWithSpace);
}
}
private static unsafe void ReplacePlusWithSpaceCore(Span<char> buffer, IntPtr state)
{
fixed (char* ptr = &MemoryMarshal.GetReference(buffer))
{
var input = (ushort*)state.ToPointer();
var output = (ushort*)ptr;
var i = (nint)0;
var n = (nint)(uint)buffer.Length;
for (; i < n; ++i)
{
if (input[i] != '+')
{
output[i] = input[i];
}
else
{
output[i] = ' ';
}
}
}
}
}
public static class Vector128Helper_Sse
{
private static readonly SpanAction<char, IntPtr> s_replacePlusWithSpace = ReplacePlusWithSpaceCore;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe string ReplacePlusWithSpace(ReadOnlySpan<char> span)
{
fixed (char* ptr = &MemoryMarshal.GetReference(span))
{
return string.Create(span.Length, (IntPtr)ptr, s_replacePlusWithSpace);
}
}
private static unsafe void ReplacePlusWithSpaceCore(Span<char> buffer, IntPtr state)
{
fixed (char* ptr = &MemoryMarshal.GetReference(buffer))
{
var input = (ushort*)state.ToPointer();
var output = (ushort*)ptr;
var i = (nint)0;
var n = (nint)(uint)buffer.Length;
if (Sse41.IsSupported && n >= Vector128<ushort>.Count)
{
var vecPlus = Vector128.Create((ushort)'+');
var vecSpace = Vector128.Create((ushort)' ');
do
{
var vec = Sse2.LoadVector128(input + i);
var mask = Sse2.CompareEqual(vec, vecPlus);
var res = Sse41.BlendVariable(vec, vecSpace, mask);
Sse2.Store(output + i, res);
i += Vector128<ushort>.Count;
} while (i <= n - Vector128<ushort>.Count);
}
for (; i < n; ++i)
{
if (input[i] != '+')
{
output[i] = input[i];
}
else
{
output[i] = ' ';
}
}
}
}
}
public static class Vector128Helper
{
private static readonly SpanAction<char, IntPtr> s_replacePlusWithSpace = ReplacePlusWithSpaceCore;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe string ReplacePlusWithSpace(ReadOnlySpan<char> span)
{
fixed (char* ptr = &MemoryMarshal.GetReference(span))
{
return string.Create(span.Length, (IntPtr)ptr, s_replacePlusWithSpace);
}
}
public static unsafe void ReplacePlusWithSpaceCore(Span<char> buffer, IntPtr state)
{
fixed (char* ptr = &MemoryMarshal.GetReference(buffer))
{
var input = (ushort*)state.ToPointer();
var output = (ushort*)ptr;
var i = (nint)0;
var n = (nint)(uint)buffer.Length;
if (n >= Vector128<ushort>.Count)
{
var vecPlus = Vector128.Create((ushort)'+');
var vecSpace = Vector128.Create((ushort)' ');
do
{
var vec = Vector128.Load(input + i);
var mask = Vector128.Equals(vec, vecPlus);
var res = Vector128.ConditionalSelect(mask, vecSpace, vec);
res.Store(output + i);
i += Vector128<ushort>.Count;
} while (i <= n - Vector128<ushort>.Count);
}
for (; i < n; ++i)
{
if (input[i] != '+')
{
output[i] = input[i];
}
else
{
output[i] = ' ';
}
}
}
}
}
public static class Vector256Helper
{
private static readonly SpanAction<char, IntPtr> s_replacePlusWithSpace = ReplacePlusWithSpaceCore;
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe string ReplacePlusWithSpace(ReadOnlySpan<char> span)
{
fixed (char* ptr = &MemoryMarshal.GetReference(span))
{
return string.Create(span.Length, (IntPtr)ptr, s_replacePlusWithSpace);
}
}
private static unsafe void ReplacePlusWithSpaceCore(Span<char> buffer, IntPtr state)
{
fixed (char* ptr = &MemoryMarshal.GetReference(buffer))
{
var input = (ushort*)state.ToPointer();
var output = (ushort*)ptr;
var i = (nint)0;
var n = (nint)(uint)buffer.Length;
if (Vector256.IsHardwareAccelerated && n >= Vector256<ushort>.Count)
{
var vecPlus = Vector256.Create((ushort)'+');
var vecSpace = Vector256.Create((ushort)' ');
do
{
var vec = Vector256.Load(input + i);
var mask = Vector256.Equals(vec, vecPlus);
var res = Vector256.ConditionalSelect(mask, vecSpace, vec);
res.Store(output + i);
i += Vector256<ushort>.Count;
} while (i <= n - Vector256<ushort>.Count);
}
if (Vector128.IsHardwareAccelerated && n - i >= Vector128<ushort>.Count)
{
var vecPlus = Vector128.Create((ushort)'+');
var vecSpace = Vector128.Create((ushort)' ');
do
{
var vec = Vector128.Load(input + i);
var mask = Vector128.Equals(vec, vecPlus);
var res = Vector128.ConditionalSelect(mask, vecSpace, vec);
res.Store(output + i);
i += Vector128<ushort>.Count;
} while (i <= n - Vector128<ushort>.Count);
}
for (; i < n; ++i)
{
if (input[i] != '+')
{
output[i] = input[i];
}
else
{
output[i] = ' ';
}
}
}
}
}
BenchmarkDotNet=v0.13.1, OS=Windows 10.0.22621
AMD Ryzen 7 5800X, 1 CPU, 16 logical and 8 physical cores
.NET SDK=7.0.100-preview.4.22252.9
  [Host]    : .NET 7.0.0 (7.0.22.22904), X64 RyuJIT
  RyuJitX64 : .NET 7.0.0 (7.0.22.30504), X64 RyuJIT

Job=RyuJitX64  Jit=RyuJit  Platform=X64  
Toolchain=.NET 7.0.100-preview.6.22307.20  
Method Value Mean Error StdDev Median Ratio RatioSD Code Size Gen 0 Gen 1 Allocated
Current **** 1.317 ns 0.0165 ns 0.0138 ns 1.319 ns 1.00 0.00 268 B - - -
Vector128 1.379 ns 0.0266 ns 0.0208 ns 1.375 ns 1.05 0.03 268 B - - -
Vector256 1.310 ns 0.0389 ns 0.0364 ns 1.294 ns 1.00 0.03 268 B - - -
Current +aa 6.731 ns 0.1773 ns 0.3106 ns 6.705 ns 1.00 0.00 268 B 0.0019 - 32 B
Vector128 +aa 6.698 ns 0.1804 ns 0.2644 ns 6.645 ns 1.00 0.06 268 B 0.0019 - 32 B
Vector256 +aa 6.723 ns 0.1790 ns 0.1915 ns 6.734 ns 1.01 0.06 268 B 0.0019 - 32 B
Current +aaaa+ 8.237 ns 0.1365 ns 0.1139 ns 8.214 ns 1.00 0.00 268 B 0.0024 - 40 B
Vector128 +aaaa+ 8.109 ns 0.1052 ns 0.0933 ns 8.114 ns 0.99 0.02 268 B 0.0024 - 40 B
Vector256 +aaaa+ 8.713 ns 0.2150 ns 0.3765 ns 8.721 ns 1.07 0.05 268 B 0.0024 - 40 B
Current +aaaa+aa 5.837 ns 0.1348 ns 0.1125 ns 5.808 ns 1.00 0.00 268 B 0.0024 - 40 B
Vector128 +aaaa+aa 5.711 ns 0.1591 ns 0.2068 ns 5.698 ns 0.98 0.05 268 B 0.0024 - 40 B
Vector256 +aaaa+aa 5.875 ns 0.0769 ns 0.0719 ns 5.878 ns 1.01 0.02 268 B 0.0024 - 40 B
Current +aaaa+aaa 5.998 ns 0.1526 ns 0.3568 ns 5.842 ns 1.00 0.00 268 B 0.0024 - 40 B
Vector128 +aaaa+aaa 5.775 ns 0.1323 ns 0.1238 ns 5.778 ns 0.91 0.05 268 B 0.0024 - 40 B
Vector256 +aaaa+aaa 6.034 ns 0.1181 ns 0.1047 ns 6.032 ns 0.95 0.07 268 B 0.0024 - 40 B
Current +aaaa+aaaa+a 7.784 ns 0.1768 ns 0.1654 ns 7.804 ns 1.00 0.00 268 B 0.0029 - 48 B
Vector128 +aaaa+aaaa+a 7.667 ns 0.1610 ns 0.1427 ns 7.639 ns 0.99 0.03 268 B 0.0029 - 48 B
Vector256 +aaaa+aaaa+a 7.859 ns 0.1180 ns 0.0921 ns 7.879 ns 1.01 0.02 268 B 0.0029 - 48 B
Current +aaaa+aaaa+aaaa 9.351 ns 0.2320 ns 0.3252 ns 9.347 ns 1.00 0.00 268 B 0.0033 - 56 B
Vector128 +aaaa+aaaa+aaaa 9.617 ns 0.2375 ns 0.5409 ns 9.608 ns 1.07 0.07 268 B 0.0033 - 56 B
Vector256 +aaaa+aaaa+aaaa 9.842 ns 0.2407 ns 0.5077 ns 9.868 ns 1.08 0.06 268 B 0.0033 - 56 B
Current +aaaa+aaaa+aaaa+ 6.934 ns 0.1839 ns 0.4335 ns 6.952 ns 1.00 0.00 268 B 0.0033 - 56 B
Vector128 +aaaa+aaaa+aaaa+ 6.356 ns 0.1703 ns 0.1749 ns 6.329 ns 0.97 0.05 268 B 0.0033 - 56 B
Vector256 +aaaa+aaaa+aaaa+ 6.093 ns 0.1166 ns 0.1033 ns 6.106 ns 0.93 0.05 268 B 0.0033 - 56 B
Current +aaaa+aaaa+aaaa+aa 6.991 ns 0.1710 ns 0.1599 ns 6.935 ns 1.00 0.00 268 B 0.0038 - 64 B
Vector128 +aaaa+aaaa+aaaa+aa 6.927 ns 0.1248 ns 0.1106 ns 6.922 ns 0.99 0.02 268 B 0.0038 - 64 B
Vector256 +aaaa+aaaa+aaaa+aa 6.686 ns 0.0994 ns 0.0881 ns 6.668 ns 0.96 0.02 268 B 0.0038 - 64 B
Current +aaaa(...)aaaa+ [21] 8.598 ns 0.2152 ns 0.2392 ns 8.574 ns 1.00 0.00 268 B 0.0038 - 64 B
Vector128 +aaaa(...)aaaa+ [21] 8.152 ns 0.1098 ns 0.0973 ns 8.135 ns 0.96 0.02 268 B 0.0038 - 64 B
Vector256 +aaaa(...)aaaa+ [21] 8.179 ns 0.0743 ns 0.0659 ns 8.177 ns 0.96 0.03 268 B 0.0038 - 64 B
Current +aaaa(...)a+aaa [24] 7.728 ns 0.1500 ns 0.1330 ns 7.735 ns 1.00 0.00 268 B 0.0043 - 72 B
Vector128 +aaaa(...)a+aaa [24] 7.360 ns 0.1153 ns 0.1022 ns 7.391 ns 0.95 0.02 268 B 0.0043 - 72 B
Vector256 +aaaa(...)a+aaa [24] 6.574 ns 0.0694 ns 0.0615 ns 6.581 ns 0.85 0.02 268 B 0.0043 - 72 B
Current +aaaa(...)aaa+a [27] 8.214 ns 0.1025 ns 0.0856 ns 8.241 ns 1.00 0.00 268 B 0.0048 - 80 B
Vector128 +aaaa(...)aaa+a [27] 8.175 ns 0.1227 ns 0.1088 ns 8.194 ns 1.00 0.01 268 B 0.0048 - 80 B
Vector256 +aaaa(...)aaa+a [27] 8.225 ns 0.1926 ns 0.1802 ns 8.230 ns 1.00 0.03 268 B 0.0048 - 80 B
Current +aaaa(...)+aaaa [30] 10.011 ns 0.2441 ns 0.6724 ns 9.770 ns 1.00 0.00 268 B 0.0053 - 88 B
Vector128 +aaaa(...)+aaaa [30] 10.134 ns 0.2466 ns 0.5309 ns 9.963 ns 0.99 0.08 268 B 0.0053 - 88 B
Vector256 +aaaa(...)+aaaa [30] 9.812 ns 0.2400 ns 0.5513 ns 9.594 ns 0.97 0.08 268 B 0.0053 - 88 B
Current +aaaa(...)aaa+a [32] 8.690 ns 0.2142 ns 0.2292 ns 8.622 ns 1.00 0.00 268 B 0.0053 - 88 B
Vector128 +aaaa(...)aaa+a [32] 8.404 ns 0.1444 ns 0.1350 ns 8.409 ns 0.97 0.03 268 B 0.0053 - 88 B
Vector256 +aaaa(...)aaa+a [32] 7.214 ns 0.1387 ns 0.1158 ns 7.208 ns 0.83 0.02 268 B 0.0053 - 88 B
Current +aaaa(...)aa+aa [33] 9.056 ns 0.2239 ns 0.5009 ns 8.924 ns 1.00 0.00 268 B 0.0053 - 88 B
Vector128 +aaaa(...)aa+aa [33] 8.357 ns 0.0891 ns 0.0744 ns 8.352 ns 0.87 0.04 268 B 0.0053 - 88 B
Vector256 +aaaa(...)aa+aa [33] 7.835 ns 0.2010 ns 0.4496 ns 7.673 ns 0.87 0.07 268 B 0.0053 - 88 B
Current +aaaa(...)aaaa+ [36] 9.222 ns 0.1996 ns 0.1770 ns 9.249 ns 1.00 0.00 268 B 0.0057 - 96 B
Vector128 +aaaa(...)aaaa+ [36] 9.787 ns 0.2401 ns 0.6572 ns 9.631 ns 1.05 0.09 268 B 0.0057 - 96 B
Vector256 +aaaa(...)aaaa+ [36] 8.698 ns 0.0746 ns 0.0661 ns 8.711 ns 0.94 0.02 268 B 0.0057 - 96 B
Current +aaaa(...)a+aaa [39] 10.662 ns 0.1480 ns 0.1384 ns 10.681 ns 1.00 0.00 268 B 0.0062 - 104 B
Vector128 +aaaa(...)a+aaa [39] 10.545 ns 0.1155 ns 0.0965 ns 10.537 ns 0.99 0.01 268 B 0.0062 - 104 B
Vector256 +aaaa(...)a+aaa [39] 10.534 ns 0.2004 ns 0.1777 ns 10.492 ns 0.99 0.03 268 B 0.0062 - 104 B
Current +aaaa(...)+aaaa [40] 9.285 ns 0.1520 ns 0.1269 ns 9.256 ns 1.00 0.00 268 B 0.0062 - 104 B
Vector128 +aaaa(...)+aaaa [40] 9.311 ns 0.2304 ns 0.2042 ns 9.249 ns 1.00 0.02 268 B 0.0062 - 104 B
Vector256 +aaaa(...)+aaaa [40] 8.262 ns 0.2011 ns 0.1975 ns 8.216 ns 0.89 0.02 268 B 0.0062 - 104 B
Current +aaaa(...)aaa+a [42] 9.764 ns 0.2419 ns 0.3060 ns 9.703 ns 1.00 0.00 268 B 0.0067 - 112 B
Vector128 +aaaa(...)aaa+a [42] 9.771 ns 0.2403 ns 0.3596 ns 9.816 ns 1.00 0.05 268 B 0.0067 - 112 B
Vector256 +aaaa(...)aaa+a [42] 8.852 ns 0.0740 ns 0.0656 ns 8.866 ns 0.90 0.03 268 B 0.0067 - 112 B
Current +aaaa(...)+aaaa [45] 10.367 ns 0.2522 ns 0.3452 ns 10.257 ns 1.00 0.00 268 B 0.0067 - 112 B
Vector128 +aaaa(...)+aaaa [45] 10.840 ns 0.2087 ns 0.1952 ns 10.784 ns 1.04 0.04 268 B 0.0067 - 112 B
Vector256 +aaaa(...)+aaaa [45] 9.644 ns 0.1184 ns 0.0988 ns 9.632 ns 0.92 0.03 268 B 0.0067 - 112 B
Current +aaaa(...)aa+aa [48] 10.099 ns 0.1632 ns 0.1446 ns 10.040 ns 1.00 0.00 268 B 0.0072 - 120 B
Vector128 +aaaa(...)aa+aa [48] 10.176 ns 0.2098 ns 0.1860 ns 10.116 ns 1.01 0.03 268 B 0.0072 - 120 B
Vector256 +aaaa(...)aa+aa [48] 8.670 ns 0.2062 ns 0.1929 ns 8.651 ns 0.86 0.02 268 B 0.0072 - 120 B
Current +aaaa(...)aaaa+ [51] 10.752 ns 0.2346 ns 0.2195 ns 10.669 ns 1.00 0.00 268 B 0.0076 - 128 B
Vector128 +aaaa(...)aaaa+ [51] 10.453 ns 0.2530 ns 0.4296 ns 10.452 ns 0.99 0.04 268 B 0.0076 - 128 B
Vector256 +aaaa(...)aaaa+ [51] 10.028 ns 0.2425 ns 0.4954 ns 9.946 ns 0.93 0.06 268 B 0.0076 - 128 B
Current +aaaa(...)a+aaa [54] 11.965 ns 0.2794 ns 0.4744 ns 11.923 ns 1.00 0.00 268 B 0.0081 - 136 B
Vector128 +aaaa(...)a+aaa [54] 12.079 ns 0.2844 ns 0.5546 ns 11.907 ns 1.02 0.06 268 B 0.0081 - 136 B
Vector256 +aaaa(...)a+aaa [54] 11.446 ns 0.2696 ns 0.6861 ns 11.245 ns 0.97 0.07 268 B 0.0081 - 136 B
Current +aaaa(...)aaaa+ [56] 10.735 ns 0.2532 ns 0.2601 ns 10.648 ns 1.00 0.00 268 B 0.0081 - 136 B
Vector128 +aaaa(...)aaaa+ [56] 11.315 ns 0.2751 ns 0.7936 ns 11.012 ns 1.18 0.04 268 B 0.0081 - 136 B
Vector256 +aaaa(...)aaaa+ [56] 9.749 ns 0.2228 ns 0.3783 ns 9.640 ns 0.92 0.03 268 B 0.0081 - 136 B
Current +aaaa(...)aaa+a [57] 10.860 ns 0.1088 ns 0.0965 ns 10.840 ns 1.00 0.00 268 B 0.0081 - 136 B
Vector128 +aaaa(...)aaa+a [57] 11.114 ns 0.2534 ns 0.2919 ns 11.131 ns 1.02 0.03 268 B 0.0081 - 136 B
Vector256 +aaaa(...)aaa+a [57] 9.772 ns 0.0717 ns 0.0636 ns 9.784 ns 0.90 0.01 268 B 0.0081 - 136 B
Current +aaaa(...)+aaaa [60] 11.475 ns 0.2683 ns 0.2509 ns 11.409 ns 1.00 0.00 268 B 0.0086 - 144 B
Vector128 +aaaa(...)+aaaa [60] 11.387 ns 0.2435 ns 0.2277 ns 11.316 ns 0.99 0.03 268 B 0.0086 - 144 B
Vector256 +aaaa(...)+aaaa [60] 11.662 ns 0.2776 ns 0.4934 ns 11.603 ns 1.02 0.05 268 B 0.0086 - 144 B
Current +aaaa(...)aa+aa [63] 13.621 ns 0.3160 ns 0.4919 ns 13.662 ns 1.00 0.00 268 B 0.0091 - 152 B
Vector128 +aaaa(...)aa+aa [63] 13.656 ns 0.3106 ns 0.4836 ns 13.620 ns 1.00 0.04 268 B 0.0091 - 152 B
Vector256 +aaaa(...)aa+aa [63] 12.873 ns 0.2998 ns 0.4394 ns 12.912 ns 0.94 0.04 268 B 0.0091 - 152 B
Current +aaaa(...)a+aaa [64] 12.709 ns 0.2938 ns 0.4988 ns 12.636 ns 1.00 0.00 268 B 0.0091 - 152 B
Vector128 +aaaa(...)a+aaa [64] 12.535 ns 0.2941 ns 0.5666 ns 12.575 ns 0.99 0.06 268 B 0.0091 - 152 B
Vector256 +aaaa(...)a+aaa [64] 11.161 ns 0.2590 ns 0.3714 ns 11.147 ns 0.88 0.04 268 B 0.0091 - 152 B
Current +aaaa(...)aaaa+ [66] 13.500 ns 0.3160 ns 0.8597 ns 13.387 ns 1.00 0.00 268 B 0.0095 - 160 B
Vector128 +aaaa(...)aaaa+ [66] 12.992 ns 0.2799 ns 0.5589 ns 12.933 ns 0.95 0.07 268 B 0.0095 - 160 B
Vector256 +aaaa(...)aaaa+ [66] 11.205 ns 0.2691 ns 0.4345 ns 11.205 ns 0.81 0.06 268 B 0.0096 - 160 B
Current +aaaa(...)a+aaa [69] 13.630 ns 0.3179 ns 0.5398 ns 13.564 ns 1.00 0.00 268 B 0.0095 - 160 B
Vector128 +aaaa(...)a+aaa [69] 13.717 ns 0.3157 ns 0.5188 ns 13.736 ns 1.01 0.06 268 B 0.0095 - 160 B
Vector256 +aaaa(...)a+aaa [69] 12.319 ns 0.2923 ns 0.6833 ns 12.146 ns 0.92 0.06 268 B 0.0096 - 160 B
Current +aaaa(...)aaa+a [72] 12.244 ns 0.2777 ns 0.3611 ns 12.113 ns 1.00 0.00 268 B 0.0100 - 168 B
Vector128 +aaaa(...)aaa+a [72] 12.490 ns 0.2614 ns 0.2567 ns 12.484 ns 1.01 0.04 268 B 0.0100 - 168 B
Vector256 +aaaa(...)aaa+a [72] 11.086 ns 0.2530 ns 0.2366 ns 11.091 ns 0.90 0.04 268 B 0.0100 - 168 B
Current +aaaa(...)+aaaa [75] 13.882 ns 0.3074 ns 0.7597 ns 13.716 ns 1.00 0.00 268 B 0.0105 - 176 B
Vector128 +aaaa(...)+aaaa [75] 12.934 ns 0.3003 ns 0.4209 ns 12.752 ns 0.93 0.06 268 B 0.0105 - 176 B
Vector256 +aaaa(...)+aaaa [75] 11.234 ns 0.1232 ns 0.1092 ns 11.252 ns 0.81 0.05 268 B 0.0105 - 176 B
Current +aaaa(...)aa+aa [78] 13.607 ns 0.1608 ns 0.1342 ns 13.574 ns 1.00 0.00 268 B 0.0110 - 184 B
Vector128 +aaaa(...)aa+aa [78] 13.795 ns 0.2030 ns 0.1799 ns 13.738 ns 1.01 0.01 268 B 0.0110 - 184 B
Vector256 +aaaa(...)aa+aa [78] 12.569 ns 0.1045 ns 0.0816 ns 12.573 ns 0.93 0.01 268 B 0.0110 - 184 B
Current +aaaa(...)+aaaa [80] 13.036 ns 0.1234 ns 0.1094 ns 12.996 ns 1.00 0.00 268 B 0.0110 - 184 B
Vector128 +aaaa(...)+aaaa [80] 13.307 ns 0.3094 ns 0.6458 ns 13.083 ns 1.00 0.05 268 B 0.0110 - 184 B
Vector256 +aaaa(...)+aaaa [80] 11.540 ns 0.2750 ns 0.3167 ns 11.409 ns 0.89 0.03 268 B 0.0110 - 184 B
Current +aaaa(...)aaaa+ [81] 12.631 ns 0.1100 ns 0.0975 ns 12.615 ns 1.00 0.00 268 B 0.0110 - 184 B
Vector128 +aaaa(...)aaaa+ [81] 12.653 ns 0.1733 ns 0.1353 ns 12.678 ns 1.00 0.01 268 B 0.0110 - 184 B
Vector256 +aaaa(...)aaaa+ [81] 11.787 ns 0.2366 ns 0.2098 ns 11.788 ns 0.93 0.01 268 B 0.0110 - 184 B
Current +aaaa(...)a+aaa [84] 14.024 ns 0.3248 ns 0.5426 ns 13.801 ns 1.00 0.00 268 B 0.0115 - 192 B
Vector128 +aaaa(...)a+aaa [84] 13.521 ns 0.1766 ns 0.1566 ns 13.523 ns 0.96 0.04 268 B 0.0115 - 192 B
Vector256 +aaaa(...)a+aaa [84] 12.507 ns 0.2648 ns 0.3050 ns 12.453 ns 0.90 0.03 268 B 0.0115 - 192 B
Current +aaaa(...)aaa+a [87] 15.462 ns 0.3287 ns 0.6714 ns 15.217 ns 1.00 0.00 268 B 0.0120 - 200 B
Vector128 +aaaa(...)aaa+a [87] 14.881 ns 0.3351 ns 0.5218 ns 14.716 ns 0.97 0.06 268 B 0.0120 - 200 B
Vector256 +aaaa(...)aaa+a [87] 13.941 ns 0.3229 ns 0.5483 ns 13.715 ns 0.90 0.05 268 B 0.0120 - 200 B
Current +aaaa(...)aa+aa [88] 13.809 ns 0.2364 ns 0.2095 ns 13.787 ns 1.00 0.00 268 B 0.0120 - 200 B
Vector128 +aaaa(...)aa+aa [88] 14.332 ns 0.3311 ns 0.3813 ns 14.330 ns 1.04 0.04 268 B 0.0120 - 200 B
Vector256 +aaaa(...)aa+aa [88] 11.981 ns 0.2404 ns 0.2132 ns 11.937 ns 0.87 0.02 268 B 0.0120 - 200 B
Current +aaaa(...)+aaaa [90] 14.149 ns 0.3236 ns 0.4537 ns 14.120 ns 1.00 0.00 268 B 0.0124 - 208 B
Vector128 +aaaa(...)+aaaa [90] 14.019 ns 0.2680 ns 0.2238 ns 14.052 ns 1.00 0.03 268 B 0.0124 - 208 B
Vector256 +aaaa(...)+aaaa [90] 12.778 ns 0.2307 ns 0.1926 ns 12.788 ns 0.91 0.04 268 B 0.0124 - 208 B
Current +aaaa(...)aa+aa [93] 15.313 ns 0.3471 ns 0.3997 ns 15.264 ns 1.00 0.00 268 B 0.0124 - 208 B
Vector128 +aaaa(...)aa+aa [93] 14.595 ns 0.1778 ns 0.1576 ns 14.586 ns 0.95 0.03 268 B 0.0124 - 208 B
Vector256 +aaaa(...)aa+aa [93] 13.446 ns 0.3100 ns 0.2899 ns 13.340 ns 0.88 0.02 268 B 0.0124 - 208 B
Current +aaaa(...)aaaa+ [96] 14.415 ns 0.2375 ns 0.2221 ns 14.446 ns 1.00 0.00 268 B 0.0129 - 216 B
Vector128 +aaaa(...)aaaa+ [96] 15.563 ns 0.3515 ns 0.8948 ns 15.393 ns 1.07 0.07 268 B 0.0129 - 216 B
Vector256 +aaaa(...)aaaa+ [96] 13.138 ns 0.2274 ns 0.2016 ns 13.221 ns 0.91 0.02 268 B 0.0129 - 216 B
Current +aaaa(...)a+aaa [99] 15.288 ns 0.3082 ns 0.2883 ns 15.330 ns 1.00 0.00 268 B 0.0134 - 224 B
Vector128 +aaaa(...)a+aaa [99] 15.227 ns 0.1411 ns 0.1251 ns 15.263 ns 0.99 0.02 268 B 0.0134 - 224 B
Vector256 +aaaa(...)a+aaa [99] 14.198 ns 0.3299 ns 0.7515 ns 14.149 ns 0.96 0.04 268 B 0.0134 - 224 B
Current +aaa(...)+aaa [104] 17.024 ns 0.5917 ns 1.7259 ns 16.576 ns 1.00 0.00 268 B 0.0139 - 232 B
Vector128 +aaa(...)+aaa [104] 17.033 ns 0.3829 ns 0.8643 ns 16.957 ns 1.03 0.13 268 B 0.0139 - 232 B
Vector256 +aaa(...)+aaa [104] 15.328 ns 0.4278 ns 1.2065 ns 15.030 ns 0.91 0.10 268 B 0.0139 - 232 B
Current +aaa(...)aa+a [112] 18.593 ns 0.4188 ns 0.6997 ns 18.468 ns 1.00 0.00 268 B 0.0148 - 248 B
Vector128 +aaa(...)aa+a [112] 18.209 ns 0.4378 ns 1.2492 ns 17.997 ns 0.95 0.05 268 B 0.0148 - 248 B
Vector256 +aaa(...)aa+a [112] 16.548 ns 0.4382 ns 1.2501 ns 16.147 ns 0.88 0.07 268 B 0.0148 - 248 B
Current +aaa(...)aaaa [120] 19.267 ns 0.4327 ns 1.1625 ns 19.324 ns 1.00 0.00 268 B 0.0158 - 264 B
Vector128 +aaa(...)aaaa [120] 19.518 ns 0.6646 ns 1.8636 ns 19.104 ns 1.02 0.12 268 B 0.0158 - 264 B
Vector256 +aaa(...)aaaa [120] 16.689 ns 0.3820 ns 1.0328 ns 16.699 ns 0.87 0.06 268 B 0.0158 - 264 B
Current +aaa(...)a+aa [128] 21.188 ns 0.4655 ns 1.1678 ns 21.279 ns 1.00 0.00 268 B 0.0167 - 280 B
Vector128 +aaa(...)a+aa [128] 20.310 ns 0.4527 ns 1.0126 ns 20.268 ns 0.96 0.07 268 B 0.0167 - 280 B
Vector256 +aaa(...)a+aa [128] 17.619 ns 0.4745 ns 1.3382 ns 17.458 ns 0.83 0.07 268 B 0.0167 - 280 B
Current +aaa(...)aaa+ [136] 21.304 ns 0.4751 ns 1.2007 ns 21.244 ns 1.00 0.00 268 B 0.0176 - 296 B
Vector128 +aaa(...)aaa+ [136] 20.424 ns 0.4542 ns 1.2735 ns 20.043 ns 0.97 0.08 268 B 0.0177 - 296 B
Vector256 +aaa(...)aaa+ [136] 18.526 ns 0.4387 ns 1.2586 ns 18.413 ns 0.88 0.08 268 B 0.0177 - 296 B
Current +aaa(...)+aaa [144] 21.509 ns 0.4735 ns 1.1965 ns 21.425 ns 1.00 0.00 268 B 0.0186 - 312 B
Vector128 +aaa(...)+aaa [144] 21.998 ns 0.4844 ns 1.4054 ns 21.849 ns 1.03 0.09 268 B 0.0186 - 312 B
Vector256 +aaa(...)+aaa [144] 18.403 ns 0.4131 ns 1.1446 ns 18.348 ns 0.86 0.06 268 B 0.0186 - 312 B
Current +aaa(...)aa+a [152] 22.317 ns 0.5075 ns 1.4724 ns 22.046 ns 1.00 0.00 268 B 0.0196 - 328 B
Vector128 +aaa(...)aa+a [152] 22.100 ns 0.4911 ns 1.1186 ns 22.059 ns 1.00 0.09 268 B 0.0196 - 328 B
Vector256 +aaa(...)aa+a [152] 18.878 ns 0.4248 ns 1.1484 ns 18.938 ns 0.85 0.07 268 B 0.0196 - 328 B
Current +aaa(...)aaaa [160] 23.005 ns 0.5084 ns 1.1784 ns 23.027 ns 1.00 0.00 268 B 0.0206 - 344 B
Vector128 +aaa(...)aaaa [160] 23.755 ns 0.5216 ns 1.1667 ns 23.714 ns 1.03 0.08 268 B 0.0206 - 344 B
Vector256 +aaa(...)aaaa [160] 19.209 ns 0.4288 ns 0.8856 ns 19.240 ns 0.84 0.07 268 B 0.0206 - 344 B
Current +aaa(...)a+aa [168] 24.510 ns 0.5378 ns 1.4354 ns 24.685 ns 1.00 0.00 268 B 0.0215 0.0000 360 B
Vector128 +aaa(...)a+aa [168] 24.648 ns 0.6216 ns 1.7633 ns 24.336 ns 1.01 0.09 268 B 0.0215 0.0000 360 B
Vector256 +aaa(...)a+aa [168] 21.353 ns 0.5572 ns 1.6077 ns 20.850 ns 0.87 0.08 268 B 0.0215 0.0000 360 B
Current +aaa(...)aaa+ [176] 24.522 ns 0.5384 ns 1.1357 ns 24.484 ns 1.00 0.00 268 B 0.0225 0.0000 376 B
Vector128 +aaa(...)aaa+ [176] 24.146 ns 0.4916 ns 0.9471 ns 24.111 ns 0.99 0.05 268 B 0.0225 0.0000 376 B
Vector256 +aaa(...)aaa+ [176] 20.777 ns 0.4613 ns 1.1315 ns 20.931 ns 0.84 0.06 268 B 0.0225 0.0000 376 B
Current +aaa(...)+aaa [184] 26.085 ns 0.5695 ns 1.6341 ns 25.920 ns 1.00 0.00 268 B 0.0234 0.0000 392 B
Vector128 +aaa(...)+aaa [184] 26.304 ns 0.5659 ns 1.3002 ns 26.151 ns 1.02 0.07 268 B 0.0234 0.0000 392 B
Vector256 +aaa(...)+aaa [184] 21.886 ns 0.5015 ns 1.3979 ns 21.747 ns 0.84 0.08 268 B 0.0234 0.0000 392 B
Current +aaa(...)aa+a [192] 28.742 ns 0.9242 ns 2.6958 ns 27.904 ns 1.00 0.00 268 B 0.0244 0.0000 408 B
Vector128 +aaa(...)aa+a [192] 27.654 ns 0.6941 ns 1.9916 ns 27.324 ns 0.97 0.10 268 B 0.0244 0.0000 408 B
Vector256 +aaa(...)aa+a [192] 22.258 ns 0.4957 ns 1.3653 ns 22.107 ns 0.78 0.08 268 B 0.0244 0.0000 408 B
Current +aaa(...)aaaa [200] 29.885 ns 1.0554 ns 3.0786 ns 29.443 ns 1.00 0.00 268 B 0.0253 - 424 B
Vector128 +aaa(...)aaaa [200] 28.064 ns 0.6095 ns 1.7585 ns 27.722 ns 0.95 0.12 268 B 0.0253 0.0000 424 B
Vector256 +aaa(...)aaaa [200] 22.789 ns 0.4998 ns 1.2902 ns 22.664 ns 0.78 0.07 268 B 0.0253 0.0000 424 B
Current +aaa(...)a+aa [208] 29.526 ns 0.6345 ns 1.2224 ns 29.609 ns 1.00 0.00 268 B 0.0263 - 440 B
Vector128 +aaa(...)a+aa [208] 28.778 ns 0.6219 ns 1.4901 ns 28.653 ns 0.97 0.06 268 B 0.0263 - 440 B
Vector256 +aaa(...)a+aa [208] 23.159 ns 0.5035 ns 0.8273 ns 23.458 ns 0.79 0.04 268 B 0.0263 0.0000 440 B
Current +aaa(...)aaa+ [216] 31.064 ns 0.7214 ns 2.0583 ns 30.759 ns 1.00 0.00 268 B 0.0272 - 456 B
Vector128 +aaa(...)aaa+ [216] 33.746 ns 0.9434 ns 2.7670 ns 33.670 ns 1.09 0.11 268 B 0.0272 - 456 B
Vector256 +aaa(...)aaa+ [216] 24.240 ns 0.5350 ns 1.5177 ns 24.163 ns 0.78 0.08 268 B 0.0272 0.0000 456 B
Current +aaa(...)+aaa [224] 29.885 ns 0.6492 ns 1.7661 ns 29.748 ns 1.00 0.00 268 B 0.0282 - 472 B
Vector128 +aaa(...)+aaa [224] 32.060 ns 0.6837 ns 1.6772 ns 32.302 ns 1.08 0.09 268 B 0.0282 - 472 B
Vector256 +aaa(...)+aaa [224] 26.376 ns 0.9178 ns 2.6626 ns 26.089 ns 0.90 0.11 268 B 0.0281 - 472 B
Current +aaa(...)aa+a [232] 30.928 ns 0.6637 ns 1.8279 ns 30.508 ns 1.00 0.00 268 B 0.0291 - 488 B
Vector128 +aaa(...)aa+a [232] 32.001 ns 0.6846 ns 2.0078 ns 31.669 ns 1.04 0.09 268 B 0.0291 - 488 B
Vector256 +aaa(...)aa+a [232] 26.456 ns 0.5799 ns 1.1715 ns 26.268 ns 0.85 0.07 268 B 0.0291 - 488 B
Current +aaa(...)aaaa [240] 32.413 ns 0.6928 ns 1.8128 ns 32.347 ns 1.00 0.00 268 B 0.0301 - 504 B
Vector128 +aaa(...)aaaa [240] 34.191 ns 0.7350 ns 1.6287 ns 33.978 ns 1.06 0.08 268 B 0.0301 - 504 B
Vector256 +aaa(...)aaaa [240] 25.143 ns 0.7002 ns 2.0534 ns 24.541 ns 0.79 0.08 268 B 0.0301 0.0000 504 B
Current +aaa(...)a+aa [248] 30.748 ns 0.5971 ns 0.5586 ns 30.750 ns 1.00 0.00 268 B 0.0311 0.0001 520 B
Vector128 +aaa(...)a+aa [248] 30.368 ns 0.6346 ns 0.8686 ns 30.275 ns 0.99 0.03 268 B 0.0311 0.0001 520 B
Vector256 +aaa(...)a+aa [248] 24.939 ns 0.5433 ns 0.8458 ns 25.057 ns 0.81 0.03 268 B 0.0311 0.0001 520 B
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment