Skip to content

Instantly share code, notes, and snippets.

@Const-me
Created September 18, 2020 01:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Const-me/9d1b36621e439fadf191847b0871e783 to your computer and use it in GitHub Desktop.
Save Const-me/9d1b36621e439fadf191847b0871e783 to your computer and use it in GitHub Desktop.
using System;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SimdTest
{
static class Program
{
[MethodImpl( MethodImplOptions.NoInlining )]
static int computeSum( int[] array )
{
return sumOriginal( array );
// return sumUnsafeAvx2( array );
}
// Test result: only 4% win on my PC.
[MethodImpl( MethodImplOptions.AggressiveInlining )]
static int sumUnsafeAvx2( int[] array )
{
unsafe
{
fixed( int* sourcePointer = array )
{
int* pointerEnd = sourcePointer + array.Length;
int* pointerEndAligned = sourcePointer + ( array.Length - array.Length % 16 );
Vector256<int> sumLow = Vector256<int>.Zero;
Vector256<int> sumHigh = sumLow;
int* pointer;
for( pointer = sourcePointer; pointer < pointerEndAligned; pointer += 16 )
{
var a = Avx.LoadVector256( pointer );
var b = Avx.LoadVector256( pointer + 8 );
sumLow = Avx2.Add( sumLow, a );
sumHigh = Avx2.Add( sumHigh, b );
}
sumLow = Avx2.Add( sumLow, sumHigh );
Vector128<int> res4 = Sse2.Add( sumLow.GetLower(), sumLow.GetUpper() );
res4 = Sse2.Add( res4, Sse2.Shuffle( res4, 0x4E ) );
res4 = Sse2.Add( res4, Sse2.Shuffle( res4, 1 ) );
int scalar = res4.ToScalar();
for( ; pointer < pointerEnd; pointer++ )
scalar += *pointer;
return scalar;
}
}
}
[MethodImpl( MethodImplOptions.AggressiveInlining )]
static int sumOriginal( int[] array )
{
int i;
Vector<int> vSum = Vector<int>.Zero;
Span<Vector<int>> vsArray = MemoryMarshal.Cast<int, Vector<int>>( array );
for( i = 0; i < vsArray.Length; i++ )
vSum += vsArray[ i ];
int sum = Vector.Dot( vSum, Vector<int>.One );
i *= Vector<int>.Count;
for( ; i < array.Length; i++ )
sum += array[ i ];
return sum;
}
static void fillRandomVector( int[] test, int seed )
{
// Very non-random, as the seed is hardcoded.
// This allows to compare results of different algorithms.
var r = new Random( seed );
for( int i = 0; i < test.Length; i++ )
test[ i ] = r.Next( 0x10000 );
}
static void Main( string[] args )
{
int[] test = new int[ 1024 * 1024 * 64 ];
fillRandomVector( test, 0 );
// We don't want to measure time it takes JIT to compile .NET into AMD64.
// First we do a dry run, JIT compiler does the magic, and the second time we call the function it's already compiled.
Console.WriteLine( "Warmup result {0}", computeSum( test ) );
fillRandomVector( test, 11 );
var sw = Stopwatch.StartNew();
int res = computeSum( test );
sw.Stop();
double ms = sw.Elapsed.TotalMilliseconds;
Console.WriteLine( "Spent {0}ms, result {1}", ms, res );
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment