Created
September 18, 2020 01:53
-
-
Save Const-me/9d1b36621e439fadf191847b0871e783 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Diagnostics; | |
using System.Numerics; | |
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
namespace SimdTest | |
{ | |
static class Program | |
{ | |
[MethodImpl( MethodImplOptions.NoInlining )] | |
static int computeSum( int[] array ) | |
{ | |
return sumOriginal( array ); | |
// return sumUnsafeAvx2( array ); | |
} | |
// Test result: only 4% win on my PC. | |
[MethodImpl( MethodImplOptions.AggressiveInlining )] | |
static int sumUnsafeAvx2( int[] array ) | |
{ | |
unsafe | |
{ | |
fixed( int* sourcePointer = array ) | |
{ | |
int* pointerEnd = sourcePointer + array.Length; | |
int* pointerEndAligned = sourcePointer + ( array.Length - array.Length % 16 ); | |
Vector256<int> sumLow = Vector256<int>.Zero; | |
Vector256<int> sumHigh = sumLow; | |
int* pointer; | |
for( pointer = sourcePointer; pointer < pointerEndAligned; pointer += 16 ) | |
{ | |
var a = Avx.LoadVector256( pointer ); | |
var b = Avx.LoadVector256( pointer + 8 ); | |
sumLow = Avx2.Add( sumLow, a ); | |
sumHigh = Avx2.Add( sumHigh, b ); | |
} | |
sumLow = Avx2.Add( sumLow, sumHigh ); | |
Vector128<int> res4 = Sse2.Add( sumLow.GetLower(), sumLow.GetUpper() ); | |
res4 = Sse2.Add( res4, Sse2.Shuffle( res4, 0x4E ) ); | |
res4 = Sse2.Add( res4, Sse2.Shuffle( res4, 1 ) ); | |
int scalar = res4.ToScalar(); | |
for( ; pointer < pointerEnd; pointer++ ) | |
scalar += *pointer; | |
return scalar; | |
} | |
} | |
} | |
[MethodImpl( MethodImplOptions.AggressiveInlining )] | |
static int sumOriginal( int[] array ) | |
{ | |
int i; | |
Vector<int> vSum = Vector<int>.Zero; | |
Span<Vector<int>> vsArray = MemoryMarshal.Cast<int, Vector<int>>( array ); | |
for( i = 0; i < vsArray.Length; i++ ) | |
vSum += vsArray[ i ]; | |
int sum = Vector.Dot( vSum, Vector<int>.One ); | |
i *= Vector<int>.Count; | |
for( ; i < array.Length; i++ ) | |
sum += array[ i ]; | |
return sum; | |
} | |
static void fillRandomVector( int[] test, int seed ) | |
{ | |
// Very non-random, as the seed is hardcoded. | |
// This allows to compare results of different algorithms. | |
var r = new Random( seed ); | |
for( int i = 0; i < test.Length; i++ ) | |
test[ i ] = r.Next( 0x10000 ); | |
} | |
static void Main( string[] args ) | |
{ | |
int[] test = new int[ 1024 * 1024 * 64 ]; | |
fillRandomVector( test, 0 ); | |
// We don't want to measure time it takes JIT to compile .NET into AMD64. | |
// First we do a dry run, JIT compiler does the magic, and the second time we call the function it's already compiled. | |
Console.WriteLine( "Warmup result {0}", computeSum( test ) ); | |
fillRandomVector( test, 11 ); | |
var sw = Stopwatch.StartNew(); | |
int res = computeSum( test ); | |
sw.Stop(); | |
double ms = sw.Elapsed.TotalMilliseconds; | |
Console.WriteLine( "Spent {0}ms, result {1}", ms, res ); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment