Last active
December 23, 2017 15:29
-
-
Save Tornhoof/a35f0bc652aad038556d22dcdd478ef7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Runtime.CompilerServices; | |
using System.Text; | |
using BenchmarkDotNet.Attributes; | |
namespace PopCountBenchmark | |
{ | |
public class Benchmark | |
{ | |
private static readonly ulong[] _Data = GenerateData(); | |
private static ulong[] GenerateData() | |
{ | |
var result = new ulong[1024]; | |
var random = new Random(666); | |
for (int i = 0; i < result.Length; i++) | |
{ | |
result[i] = (uint) random.Next() | ((ulong) random.Next() << 32); | |
} | |
return result; | |
} | |
/// <summary> | |
/// see https://en.wikipedia.org/wiki/Hamming_weight | |
/// </summary> | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static long BitCount(ulong x) | |
{ | |
x -= (x >> 1) & 0x5555555555555555UL; //put count of each 2 bits into those 2 bits | |
x = (x & 0x3333333333333333UL) + | |
((x >> 2) & 0x3333333333333333UL); //put count of each 4 bits into those 4 bits | |
x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FUL; //put count of each 8 bits into those 8 bits | |
return | |
(long) ((x * 0x0101010101010101UL) >> | |
56); //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ... | |
} | |
[Params(0x0F0F0F0F0F0F0F0FUL)] public ulong Input { get; set; } | |
//[Benchmark] | |
//public long BitCount() | |
//{ | |
// long counter = 0; | |
// for (int i = 0; i < 10000; i++) | |
// { | |
// counter += BitCount(Input); | |
// } | |
// return counter; | |
//} | |
//[Benchmark] | |
//public long PopCount() | |
//{ | |
// long counter = 0; | |
// for (int i = 0; i < 10000; i++) | |
// { | |
// counter += System.Runtime.Intrinsics.X86.Popcnt.PopCount(Input); | |
// } | |
// return counter; | |
//} | |
[Benchmark] | |
public long PopCountArray() | |
{ | |
long counter = 0; | |
for (int i = 0; i < 1024; i++) | |
{ | |
counter += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i]); | |
} | |
return counter; | |
} | |
[Benchmark] | |
public long PopCountUnrolledSimple() | |
{ | |
long longResult = 0; | |
for (int i = 0; i + 3 <= 1024; i += 3) | |
{ | |
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i]); | |
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 1]); | |
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 2]); | |
} | |
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[1024 - 1]); | |
return (int) longResult; | |
} | |
[Benchmark] | |
public long PopCountUnrolledOriginal() | |
{ | |
long longResult = 0; | |
for (int i = 0; i + 3 <= 1024; i += 3) | |
{ | |
var p1 = System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i]); | |
var p2 = System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 1]); | |
var p3 = System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 2]); | |
longResult += p1 + p2 + p3; | |
} | |
longResult += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[1024 - 1]); | |
return (int)longResult; | |
} | |
[Benchmark] | |
public long PopCountUnrolledReuseVar() | |
{ | |
long p1 = 0, p2 = 0, p3 = 0; | |
for (int i = 0; i + 3 <= 1024; i += 3) | |
{ | |
p1 += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i]); | |
p2 += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 1]); | |
p3 += System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[i + 2]); | |
} | |
return p1 + p2 + p3 + System.Runtime.Intrinsics.X86.Popcnt.PopCount(_Data[1024 - 1]); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@Tornhoof also copy the
_Data
into a local variable, you will see far better assembler.