Skip to content

Instantly share code, notes, and snippets.

@sebfischer83
Last active March 27, 2021 17:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sebfischer83/35793f0ece2728738dbc44532c2210d0 to your computer and use it in GitHub Desktop.
Save sebfischer83/35793f0ece2728738dbc44532c2210d0 to your computer and use it in GitHub Desktop.
Benchmark
using System;
using System.Collections.Generic;
using System.Collections.Specialized;
using System.Linq;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Diagnosers;
using BenchmarkDotNet.Diagnostics.Windows.Configs;
using BenchmarkDotNet.Order;
namespace Datatent2.AlgoBench.Find
{
[HtmlExporter, CsvExporter(), CsvMeasurementsExporter(), MarkdownExporter,
MeanColumn, MedianColumn, MediumRunJob, BaselineColumn]
public class FindFirstEmptyPageBench
{
[Params(1000)]
public int Iterations { get; set; }
byte[][] buffers;
[GlobalSetup]
public void Setup()
{
List<byte[]> list = new List<byte[]>(Iterations);
Random random = new Random();
for (int i = 0; i < Iterations; i++)
{
var l = random.Next(0, 8000);
var b = new byte[8128];
for (int j = 0; j < l; j++)
{
b[j] = 0xFF;
}
list.Add(b);
}
buffers = list.ToArray();
}
[Benchmark(Baseline = true)]
public long FindDefault()
{
long l = 0;
for (int i = 0; i < Iterations; i++)
{
l += FindFirstEmptyDefault(buffers[i]);
}
return l;
}
[Benchmark()]
public long FindUnrolled4()
{
long l = 0;
for (int i = 0; i < Iterations; i++)
{
l += FindFirstEmptyUnroll4(buffers[i]);
}
return l;
}
[Benchmark()]
public long FindUnrolled2()
{
long l = 0;
for (int i = 0; i < Iterations; i++)
{
l += FindFirstEmptyUnroll2(buffers[i]);
}
return l;
}
[Benchmark()]
public long FindUnrolled8()
{
long l = 0;
for (int i = 0; i < Iterations; i++)
{
l += FindFirstEmptyUnroll8(buffers[i]);
}
return l;
}
[Benchmark()]
public long FindSearchLong()
{
long l = 0;
for (int i = 0; i < Iterations; i++)
{
l += FindFirstSearchLong(buffers[i]);
}
return l;
}
[Benchmark()]
public long IndexOf()
{
long l = 0;
for (int i = 0; i < Iterations; i++)
{
l += FindFirstSearchLong2(buffers[i]);
}
return l;
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public int FindFirstSearchLong2(Span<byte> spanByte)
{
Span<ulong> span = MemoryMarshal.Cast<byte, ulong>(spanByte);
if (span[0] == 0)
return 1;
if (span[^1] == long.MaxValue)
return -1;
int index = -1;
var firstZero = span.IndexOf(0u);
if (firstZero > 0)
{
// check prev
if (span[firstZero - 1] == ulong.MaxValue)
index = firstZero;
else
index = firstZero - 1;
}
else
{
index = span.Length - 1;
}
if (index > -1)
{
int res = 0;
ref var l = ref span[index];
var count = BitOperations.LeadingZeroCount((ulong)l);
res = (64 - count) + 1;
if (index > 0 && res != -1)
res += (64 * index);
return res;
}
return index;
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public int FindFirstSearchLong(Span<byte> spanByte)
{
Span<ulong> span = MemoryMarshal.Cast<byte, ulong>(spanByte);
if (span[0] == 0)
return 1;
if (span[^1] == long.MaxValue)
return -1;
int min = 0;
int max = span.Length - 1;
int index = -1;
while (min <= max)
{
int mid = mid = (int)unchecked((uint)(min + max) >> 1);
ref var b = ref span[mid];
if (b != ulong.MaxValue)
{
if (mid == 0)
{
index = 0;
break;
}
ref var b1 = ref span[mid - 1];
if (b1 == ulong.MaxValue)
{
index = mid;
break;
}
max = mid - 1;
continue;
}
min = mid + 1;
}
if (index > -1)
{
int res = 0;
ref var l = ref span[index];
var count = BitOperations.LeadingZeroCount((ulong)l);
res = (64 - count) + 1;
if (index > 0 && res != -1)
res += (64 * index);
return res;
}
return index;
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public int FindFirstEmptyDefault(Span<byte> span)
{
Span<ulong> longSpan = MemoryMarshal.Cast<byte, ulong>(span);
int iterCount = longSpan.Length / 4;
for (int i = 0; i < iterCount; i++)
{
ref ulong l = ref longSpan[i];
if (l == ulong.MaxValue)
continue;
int res = 0;
var count = BitOperations.LeadingZeroCount(l);
res = (64 - count) + 1;
if (i > 0 && res != -1)
res += (64 * i);
return res;
}
return -1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public int FindFirstEmptyUnroll8(Span<byte> span)
{
Span<ulong> longSpan = MemoryMarshal.Cast<byte, ulong>(span);
var lastItem = longSpan[^1];
if (lastItem == ulong.MaxValue)
{
return -1;
}
int iterCount = longSpan.Length;
for (int i = 0; i < iterCount; i += 8)
{
ref ulong l8 = ref longSpan[i + 7];
// when l4 is max value all others before too
if (l8 == ulong.MaxValue)
continue;
ref ulong l1 = ref longSpan[i];
ref ulong l2 = ref longSpan[i + 1];
ref ulong l3 = ref longSpan[i + 2];
ref ulong l4 = ref longSpan[i + 3];
ref ulong l5 = ref longSpan[i + 4];
ref ulong l6 = ref longSpan[i + 5];
ref ulong l7 = ref longSpan[i + 6];
int mult = i + 1;
int res = -1;
if (l1 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l1);
res = (64 - count) + 1;
}
else if (l2 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l2);
res = (64) - count + 64 + 1;
}
else if (l3 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l3);
res = (64) - count + 128 + 1;
}
else if (l4 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l4);
res = (64) - count + 192 + 1;
}
else if (l5 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l4);
res = (64) - count + 256 + 1;
}
else if (l6 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l4);
res = (64) - count + 320 + 1;
}
else if (l7 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l4);
res = (64) - count + 384 + 1;
}
else if (l8 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l4);
res = (64) - count + 448 + 1;
}
if (i > 0 && res != -1)
res += (64 * i);
return res;
}
return -1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public int FindFirstEmptyUnroll4(Span<byte> span)
{
Span<ulong> longSpan = MemoryMarshal.Cast<byte, ulong>(span);
var lastItem = longSpan[^1];
if (lastItem == ulong.MaxValue)
{
return -1;
}
int iterCount = longSpan.Length;
for (int i = 0; i < iterCount; i += 4)
{
ref ulong l4 = ref longSpan[i + 3];
// when l4 is max value all others before too
if (l4 == ulong.MaxValue)
continue;
ref ulong l1 = ref longSpan[i];
ref ulong l2 = ref longSpan[i + 1];
ref ulong l3 = ref longSpan[i + 2];
int mult = i + 1;
int res = -1;
if (l1 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l1);
res = (64 - count) + 1;
}
else if (l2 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l2);
res = (64) - count + 64 + 1;
}
else if (l3 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l3);
res = (64) - count + 128 + 1;
}
else if (l4 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l4);
res = (64) - count + 192 + 1;
}
if (i > 0 && res != -1)
res += (64 * i);
return res;
}
return -1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
public int FindFirstEmptyUnroll2(Span<byte> span)
{
Span<ulong> longSpan = MemoryMarshal.Cast<byte, ulong>(span);
var lastItem = longSpan[^1];
if (lastItem == ulong.MaxValue)
{
return -1;
}
int iterCount = longSpan.Length;
for (int i = 0; i < iterCount; i += 2)
{
ref ulong l2 = ref longSpan[i + 1];
// when l4 is max value all others before too
if (l2 == ulong.MaxValue)
continue;
ref ulong l1 = ref longSpan[i];
int res = -1;
if (l1 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l1);
res = (64 - count) + 1;
}
else if (l2 != ulong.MaxValue)
{
var count = BitOperations.LeadingZeroCount(l2);
res = (64) - count + 64 + 1;
}
if (i > 0 && res != -1)
res += (64 * i);
return res;
}
return -1;
}
}
}
@sebfischer83
Copy link
Author

BenchmarkDotNet=v0.12.1, OS=Windows 10.0.19042
AMD Ryzen 7 3700X, 1 CPU, 16 logical and 8 physical cores
.NET Core SDK=5.0.201
  [Host]    : .NET Core 5.0.4 (CoreCLR 5.0.421.11614, CoreFX 5.0.421.11614), X64 RyuJIT
  MediumRun : .NET Core 5.0.4 (CoreCLR 5.0.421.11614, CoreFX 5.0.421.11614), X64 RyuJIT

Job=MediumRun  IterationCount=15  LaunchCount=2  
WarmupCount=10  
Method Iterations Mean Error StdDev Median Ratio RatioSD Baseline
FindDefault 1000 127.31 μs 6.059 μs 8.495 μs 123.15 μs 1.00 0.00 Yes
FindUnrolled4 1000 98.44 μs 1.903 μs 2.849 μs 97.96 μs 0.78 0.04 No
FindUnrolled2 1000 153.55 μs 1.461 μs 2.095 μs 152.88 μs 1.21 0.07 No
FindUnrolled8 1000 74.44 μs 0.987 μs 1.447 μs 74.83 μs 0.59 0.03 No
FindSearchLong 1000 53.88 μs 0.525 μs 0.754 μs 53.87 μs 0.43 0.02 No
IndexOf 1000 131.67 μs 6.644 μs 9.529 μs 123.90 μs 1.04 0.14 No

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment