Last active
August 29, 2015 14:02
-
-
Save mburbea/a54d1528c5557c2af1b6 to your computer and use it in GitHub Desktop.
Playing with SIMD
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.Linq; | |
using System.Runtime.InteropServices; | |
using System.Text; | |
using System.Threading.Tasks; | |
using System.Numerics; | |
using System.Runtime.CompilerServices; | |
using System.Collections; | |
using System.Reflection; | |
class Program | |
{ | |
/* Simd seems to be really quite a bit faster. Here is a simple benchmark | |
That simply processes a table over and over again to find the indexs of a comma byte. It tries various lengths | |
and various inputs. | |
It does this 10 times for each test set so that as to get enough time to actually make this interesting. | |
1_IndexOF is the indexOf on the array class and we can see it performs worse than a naive for loop, where as the | |
the method it calls ByteEqualityComparer is no slouch, but still this simple implementation of a simd memchr | |
performs lightening fast in comparison. SIMDs worse case is when it can't get an accelerated run. | |
0_IndexOf_SIMD | |
Average:41.18 | |
Total:2,512.00 | |
1_IndexOf | |
Average:74.26 | |
Total:4,530.00 | |
2_Naive | |
Average:62.31 | |
Total:3,801.00 | |
3_IndexOf | |
Average:55.87 | |
Total:3,408.00 | |
*/ | |
// static readonly int[] Debruijn64 = | |
// { | |
// 0, 1, 2, 56, 3, 32, 57, 46, 29, 4, 20, 33, 7, 58, 11, 47, | |
// 62, 30, 18, 5, 16, 21, 34, 23, 53, 8, 59, 36, 25, 12, 48, 39, | |
// 63, 55, 31, 45, 28, 19, 6, 10, 61, 17, 15, 22, 52, 35, 24, 38, | |
// 54, 44, 27, 9, 60, 14, 51, 37, 43, 26, 13, 50, 42, 49, 41, 40 | |
// }; | |
static readonly byte[] Debruijn64 = | |
{ | |
0, 0, 0, 7, 0, 4, 7, 5, 3, 0, 2, 4, 0, 7, 1, 5, | |
7, 3, 2, 0, 2, 2, 4, 2, 6, 1, 7, 4, 3, 1, 6, 4, | |
7, 6, 3, 5, 3, 2, 0, 1, 7, 2, 1, 2, 6, 4, 3, 4, | |
6, 5, 3, 1, 7, 1, 6, 4, 5, 3, 1, 6, 5, 6, 5, 5, | |
}; | |
const long DEBRUIJN_SEQ64 = 0x26752B916FC7B0D; | |
static Vector<byte> b = Vector<byte>.Zero; // force the initialization. | |
static IEnumerable<int> DelegateToEnumerable(Func<byte[],byte,int,int,int> finder,byte [] input,byte toFind) | |
{ | |
int ix=0; | |
while((ix= finder(input,toFind,ix,input.Length-ix))!=-1) | |
{ | |
yield return ix; | |
ix++; | |
} | |
} | |
static int Naive(byte[] array,byte value,int startIndex,int count) | |
{ | |
if (array == null) throw new ArgumentNullException("array"); | |
if ((startIndex < 0) || (startIndex > array.Length)) | |
{ | |
throw new ArgumentOutOfRangeException("startIndex"); | |
} | |
if ((count < 0) || (count > (array.Length - startIndex))) | |
{ | |
throw new ArgumentOutOfRangeException("count"); | |
} | |
for(int i=startIndex;count>0;i++,count--) | |
{ | |
if (array[i] == value) return i; | |
} | |
return -1; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
static int DebruijnFindByte(long v) | |
{ | |
return Debruijn64[((ulong)((v & -v) * DEBRUIJN_SEQ64)) >> 58]; | |
} | |
static int IndexOf_SIMD(byte[] array, byte value, int startIndex, int count) | |
{ | |
if (array == null) throw new ArgumentNullException("array"); | |
if ((startIndex < 0) || (startIndex > array.Length)) | |
{ | |
throw new ArgumentOutOfRangeException("startIndex"); | |
} | |
if ((count < 0) || (count > (array.Length - startIndex))) | |
{ | |
throw new ArgumentOutOfRangeException("count"); | |
} | |
Vector<byte> comparer = new Vector<byte>(value); | |
for (; count >= Vector<Byte>.Length; startIndex += Vector<Byte>.Length, count -= Vector<Byte>.Length) | |
{ | |
var vect = new Vector<byte>(array, startIndex); | |
var res = Vector.Equals<byte>(vect, comparer); | |
var vl = Vector<byte>.AsVectorInt64(res); | |
long v; | |
if ((v = vl[0]) != 0){} | |
else if ((v = vl[1]) != 0) startIndex += 8; | |
else continue; | |
return startIndex + DebruijnFindByte(v); | |
} | |
for (; count > 0; --count, startIndex++) | |
{ | |
if (array[startIndex] == value) | |
{ | |
return startIndex; | |
} | |
} | |
return -1; | |
} | |
static readonly Tuple<int, int, int>[] _tests = new[]{Tuple.Create(2,3000,3500) | |
,Tuple.Create( 2,01,10),Tuple.Create( 1,01,10),Tuple.Create( 4,01,10),Tuple.Create( 8,01,10) | |
,Tuple.Create( 64,01,10),Tuple.Create(128,01,10),Tuple.Create(256,01,10),Tuple.Create(512,01,10) | |
,Tuple.Create( 16,01,10),Tuple.Create( 1,10,20),Tuple.Create( 2,10,20),Tuple.Create( 4,10,20) | |
,Tuple.Create( 8,10,20),Tuple.Create( 16,10,20),Tuple.Create( 32,10,20),Tuple.Create( 64,10,20) | |
,Tuple.Create(128,10,20),Tuple.Create(256,10,20),Tuple.Create(480,10,20),Tuple.Create( 1,20,30) | |
,Tuple.Create( 2,20,30),Tuple.Create( 4,20,30),Tuple.Create( 8,20,30),Tuple.Create( 16,20,30) | |
,Tuple.Create( 32,20,30),Tuple.Create( 64,20,30),Tuple.Create(128,20,30),Tuple.Create(256,20,30) | |
,Tuple.Create(290,20,30),Tuple.Create( 1,30,40),Tuple.Create( 2,30,40),Tuple.Create( 4,30,40) | |
,Tuple.Create( 8,30,40),Tuple.Create( 16,30,40),Tuple.Create( 32,30,40),Tuple.Create( 64,30,40) | |
,Tuple.Create(128,30,40),Tuple.Create(210,30,40),Tuple.Create( 1,40,50),Tuple.Create( 2,40,50) | |
,Tuple.Create( 4,40,50),Tuple.Create( 8,40,50),Tuple.Create( 16,40,50),Tuple.Create( 32,40,50) | |
,Tuple.Create( 64,40,50),Tuple.Create(128,40,50),Tuple.Create(165,40,50),Tuple.Create( 32,01,10) | |
,Tuple.Create( 1,90,99),Tuple.Create( 2,90,99),Tuple.Create( 4,90,99),Tuple.Create( 8,90,99) | |
,Tuple.Create( 16,90,99),Tuple.Create( 32,90,99),Tuple.Create( 64,90,99),Tuple.Create( 80,90,99) | |
,Tuple.Create(4000, 01, 01),Tuple.Create(2000,01,05),Tuple.Create(1150,01,10),Tuple.Create(1,8000,8000),}; | |
static readonly Random rand = new Random(); | |
static readonly byte[] _bytes = initBytes(); | |
static readonly Dictionary<string, object>[] results = new Dictionary<string, object>[_tests.Length]; | |
static byte[] initBytes() | |
{ | |
var ret = new byte[8000]; | |
var l = new ulong[] { 0x0102030405060708, 0x090A010203040506, 0x0708090A01020304, 0x05060708090A0102, 0x030405060708090A, | |
0x0102030405060708, 0x090A010203040506, 0x0708090A01020304, 0x05060708090A0102, 0x030405060708090A, | |
0x0102030405060708, 0x090A010203040506, 0x0708090A01020304, 0x05060708090A0102, 0x030405060708090A, | |
0x0102030405}; | |
Buffer.BlockCopy(l, 0, ret, 0, 125); | |
for (int i = 0, j = 125; i < 6; i++, j *= 2) | |
Buffer.BlockCopy(ret, 0, ret, j, j); | |
return ret; | |
} | |
public static byte[] BuildRow(int numElems, int minwidth, int maxWidth) | |
{ | |
var l = Enumerable.Range(0, numElems).Select(x => minwidth + rand.Next(1 + maxWidth - minwidth)).ToArray(); | |
var b = new byte[l.Sum() + numElems - 1]; | |
var ix = 0; | |
Buffer.BlockCopy(_bytes, 0, b, ix, l[0]); | |
ix = l[0]; | |
for (var i = 1; i < l.Length; i++) | |
{ | |
var e = l[i]; | |
b[ix] = 0x44; | |
Buffer.BlockCopy(_bytes, 0, b, ix + 1, e); | |
ix += e + 1; | |
} | |
return b; | |
} | |
public static byte[][] BuildTable(Tuple<int, int, int> specification) | |
{ | |
return Enumerable.Range(0, 1000).Select(_ => BuildRow(specification.Item1, specification.Item2, specification.Item3)).ToArray(); | |
} | |
public static void RunTests() | |
{ | |
var sw = new Stopwatch(); | |
long k = 0; | |
for (var i = 0; i < _tests.Length; i++) | |
{ | |
var data = BuildTable(_tests[i]); | |
results[i] = new Dictionary<string, object>(); | |
//results[i["data"]=data; | |
results[i]["_numElem"] = _tests[i].Item1; | |
results[i]["_minwidth"] = _tests[i].Item2; | |
results[i]["_maxWidth"] = _tests[i].Item3; | |
Func<byte[], int> selector = b => b.Length; | |
results[i]["_minLength"] = data.Min(selector); | |
results[i]["_maxLength"] = data.Max(selector); | |
results[i]["_avgLength"] = data.Average(selector); | |
for (var j = 0; j < _candidates.Length; j++) | |
{ | |
GC.Collect(); | |
GC.WaitForPendingFinalizers(); | |
GC.Collect(); | |
sw.Start(); | |
foreach (var x in data) | |
{ | |
for (var z = 0; z < 10; z++) | |
k += DelegateToEnumerable(_candidates[j],x,0x44).Sum(r => (long)r); | |
} | |
sw.Stop(); | |
results[i][j.ToString()+"_"+_candidates[j].Method.Name] = sw.ElapsedMilliseconds; | |
sw.Reset(); | |
} | |
} | |
Console.WriteLine(k); | |
} | |
static string WriteTestResults() | |
{ | |
var sb = new StringBuilder(8000); | |
sb.AppendLine("_" + "," + string.Join<String>(",", results[0].Keys)); | |
for (var i = 0; i < results.Length; i++) | |
{ | |
sb.AppendLine(i + "," + string.Join<object>(",", results[i].Values)); | |
} | |
return sb.ToString(); | |
} | |
//static readonly Func<byte[], byte, int, int> memchr=Memchr; | |
//static readonly Func<byte[], byte, int, int> arrayIndexOf=Array.IndexOf<byte>; | |
static readonly Func<byte[], byte, int, int,int>[] _candidates = { | |
IndexOf_SIMD, | |
Array.IndexOf<byte>, | |
Naive, | |
Delegate.CreateDelegate(typeof(Func<byte[],byte,int,int,int>),EqualityComparer<Byte>.Default, | |
EqualityComparer<Byte>.Default.GetType().GetMethod( | |
"IndexOf", | |
System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance) | |
) as Func<byte[],byte,int,int,int>}; | |
static void Main(string[] args) | |
{ | |
var testRow = BuildRow(32, 90, 100); | |
var test = DelegateToEnumerable(_candidates[1], testRow, 0x44).ToArray() as IStructuralEquatable; | |
foreach(var key in _candidates){ | |
if (!test.Equals(DelegateToEnumerable(key, testRow, 0x44).ToArray(), StructuralComparisons.StructuralEqualityComparer)) | |
throw new InvalidOperationException(key.Method.Name); | |
} | |
RunTests(); | |
for (var i = 0; i < _candidates.Length;i++) | |
{ | |
var key = i.ToString() + "_" + _candidates[i].Method.Name; | |
Console.WriteLine(key); | |
Console.WriteLine("Average:" + results.Average(x => Convert.ToInt64(x[key])).ToString("n")); | |
Console.WriteLine("Total:" + results.Sum(x => Convert.ToInt64(x[key])).ToString("n")); | |
} | |
System.IO.File.WriteAllText("testresults.csv", WriteTestResults()); | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment