Skip to content

Instantly share code, notes, and snippets.

@Tornhoof
Last active December 23, 2017 15:29
Show Gist options
  • Save Tornhoof/a35f0bc652aad038556d22dcdd478ef7 to your computer and use it in GitHub Desktop.
Save Tornhoof/a35f0bc652aad038556d22dcdd478ef7 to your computer and use it in GitHub Desktop.
using System;
using System.Collections.Generic;
using System.Runtime.CompilerServices;
using System.Text;
using BenchmarkDotNet.Attributes;
namespace PopCountBenchmark
{
public class Benchmark
{
private static readonly ulong[] _Data = GenerateData();
private static ulong[] GenerateData()
{
var result = new ulong[1024];
var random = new Random(666);
for (int i = 0; i < result.Length; i++)
{
result[i] = (uint) random.Next() | ((ulong) random.Next() << 32);
}
return result;
}
/// <summary>
/// see https://en.wikipedia.org/wiki/Hamming_weight
/// </summary>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static long BitCount(ulong x)
{
x -= (x >> 1) & 0x5555555555555555UL; //put count of each 2 bits into those 2 bits
x = (x & 0x3333333333333333UL) +
((x >> 2) & 0x3333333333333333UL); //put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FUL; //put count of each 8 bits into those 8 bits
return
(long) ((x * 0x0101010101010101UL) >>
56); //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
}
[Params(0x0F0F0F0F0F0F0F0FUL)] public ulong Input { get; set; }
//[Benchmark]
//public long BitCount()
//{
// long counter = 0;
// for (int i = 0; i < 10000; i++)
// {
// counter += BitCount(Input);
// }
// return counter;
//}
//[Benchmark]
//public long PopCount()
//{
// long counter = 0;
// for (int i = 0; i < 10000; i++)
// {
// counter += System.Runtime.Intrinsics.X86.Popcnt.PopCount(Input);
// }
// return counter;
//}
[Benchmark]
public long PopCountArray()
{
long counter = 0;
for (int i = 0; i < 1024; i++)
{
counter += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i]);
}
return counter;
}
[Benchmark]
public long PopCountUnrolledSimple()
{
long longResult = 0;
for (int i = 0; i + 3 <= 1024; i += 3)
{
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i]);
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 1]);
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 2]);
}
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[1024 - 1]);
return (int) longResult;
}
[Benchmark]
public long PopCountUnrolledOriginal()
{
long longResult = 0;
for (int i = 0; i + 3 <= 1024; i += 3)
{
var p1 = System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i]);
var p2 = System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 1]);
var p3 = System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 2]);
longResult += p1 + p2 + p3;
}
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[1024 - 1]);
return (int)longResult;
}
[Benchmark]
public long PopCountUnrolledReuseVar()
{
long p1 = 0, p2 = 0, p3 = 0;
for (int i = 0; i + 3 <= 1024; i += 3)
{
p1 += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i]);
p2 += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 1]);
p3 += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 2]);
}
return p1 + p2 + p3 + System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[1024 - 1]);
}
}
}
@Tornhoof
Copy link
Author

BenchmarkDotNet=v0.10.11, OS=Windows 10 Redstone 3 [1709, Fall Creators Update] (10.0.16299.125)
Processor=Intel Core i7-4790K CPU 4.00GHz (Haswell), ProcessorCount=8
Frequency=3906249 Hz, Resolution=256.0001 ns, Timer=TSC
.NET Core SDK=2.2.0-preview1-007813
  [Host]      : .NET Core 2.1.0-preview1-26013-05 (Framework 4.6.26013.03), 64bit RyuJIT
  Core2.1-x64 : .NET Core 2.1.0-preview1-26013-05 (Framework 4.6.26013.03), 64bit RyuJIT

Job=Core2.1-x64  Runtime=Core  Toolchain=.NET Core 2.1  
Method Input Mean Error StdDev Allocated
PopCountArray 1085102592571150095 507.1 ns 0.2760 ns 0.2447 ns 0 B
PopCountUnrolledSimple 1085102592571150095 473.3 ns 0.0240 ns 0.0200 ns 0 B
PopCountUnrolledOriginal 1085102592571150095 516.3 ns 0.0744 ns 0.0659 ns 0 B
PopCountUnrolledReuseVar 1085102592571150095 480.4 ns 0.0787 ns 0.0737 ns 0 B

@redknightlois
Copy link

@Tornhoof also copy the _Data into a local variable, you will see far better assembler.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment