Skip to content

Instantly share code, notes, and snippets.

@mburbea
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mburbea/a54d1528c5557c2af1b6 to your computer and use it in GitHub Desktop.
Save mburbea/a54d1528c5557c2af1b6 to your computer and use it in GitHub Desktop.
Playing with SIMD
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading.Tasks;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Collections;
using System.Reflection;
class Program
{
/* Simd seems to be really quite a bit faster. Here is a simple benchmark
That simply processes a table over and over again to find the indexs of a comma byte. It tries various lengths
and various inputs.
It does this 10 times for each test set so that as to get enough time to actually make this interesting.
1_IndexOF is the indexOf on the array class and we can see it performs worse than a naive for loop, where as the
the method it calls ByteEqualityComparer is no slouch, but still this simple implementation of a simd memchr
performs lightening fast in comparison. SIMDs worse case is when it can't get an accelerated run.
0_IndexOf_SIMD
Average:41.18
Total:2,512.00
1_IndexOf
Average:74.26
Total:4,530.00
2_Naive
Average:62.31
Total:3,801.00
3_IndexOf
Average:55.87
Total:3,408.00
*/
// static readonly int[] Debruijn64 =
// {
// 0, 1, 2, 56, 3, 32, 57, 46, 29, 4, 20, 33, 7, 58, 11, 47,
// 62, 30, 18, 5, 16, 21, 34, 23, 53, 8, 59, 36, 25, 12, 48, 39,
// 63, 55, 31, 45, 28, 19, 6, 10, 61, 17, 15, 22, 52, 35, 24, 38,
// 54, 44, 27, 9, 60, 14, 51, 37, 43, 26, 13, 50, 42, 49, 41, 40
// };
static readonly byte[] Debruijn64 =
{
0, 0, 0, 7, 0, 4, 7, 5, 3, 0, 2, 4, 0, 7, 1, 5,
7, 3, 2, 0, 2, 2, 4, 2, 6, 1, 7, 4, 3, 1, 6, 4,
7, 6, 3, 5, 3, 2, 0, 1, 7, 2, 1, 2, 6, 4, 3, 4,
6, 5, 3, 1, 7, 1, 6, 4, 5, 3, 1, 6, 5, 6, 5, 5,
};
const long DEBRUIJN_SEQ64 = 0x26752B916FC7B0D;
static Vector<byte> b = Vector<byte>.Zero; // force the initialization.
static IEnumerable<int> DelegateToEnumerable(Func<byte[],byte,int,int,int> finder,byte [] input,byte toFind)
{
int ix=0;
while((ix= finder(input,toFind,ix,input.Length-ix))!=-1)
{
yield return ix;
ix++;
}
}
static int Naive(byte[] array,byte value,int startIndex,int count)
{
if (array == null) throw new ArgumentNullException("array");
if ((startIndex < 0) || (startIndex > array.Length))
{
throw new ArgumentOutOfRangeException("startIndex");
}
if ((count < 0) || (count > (array.Length - startIndex)))
{
throw new ArgumentOutOfRangeException("count");
}
for(int i=startIndex;count>0;i++,count--)
{
if (array[i] == value) return i;
}
return -1;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
static int DebruijnFindByte(long v)
{
return Debruijn64[((ulong)((v & -v) * DEBRUIJN_SEQ64)) >> 58];
}
static int IndexOf_SIMD(byte[] array, byte value, int startIndex, int count)
{
if (array == null) throw new ArgumentNullException("array");
if ((startIndex < 0) || (startIndex > array.Length))
{
throw new ArgumentOutOfRangeException("startIndex");
}
if ((count < 0) || (count > (array.Length - startIndex)))
{
throw new ArgumentOutOfRangeException("count");
}
Vector<byte> comparer = new Vector<byte>(value);
for (; count >= Vector<Byte>.Length; startIndex += Vector<Byte>.Length, count -= Vector<Byte>.Length)
{
var vect = new Vector<byte>(array, startIndex);
var res = Vector.Equals<byte>(vect, comparer);
var vl = Vector<byte>.AsVectorInt64(res);
long v;
if ((v = vl[0]) != 0){}
else if ((v = vl[1]) != 0) startIndex += 8;
else continue;
return startIndex + DebruijnFindByte(v);
}
for (; count > 0; --count, startIndex++)
{
if (array[startIndex] == value)
{
return startIndex;
}
}
return -1;
}
static readonly Tuple<int, int, int>[] _tests = new[]{Tuple.Create(2,3000,3500)
,Tuple.Create( 2,01,10),Tuple.Create( 1,01,10),Tuple.Create( 4,01,10),Tuple.Create( 8,01,10)
,Tuple.Create( 64,01,10),Tuple.Create(128,01,10),Tuple.Create(256,01,10),Tuple.Create(512,01,10)
,Tuple.Create( 16,01,10),Tuple.Create( 1,10,20),Tuple.Create( 2,10,20),Tuple.Create( 4,10,20)
,Tuple.Create( 8,10,20),Tuple.Create( 16,10,20),Tuple.Create( 32,10,20),Tuple.Create( 64,10,20)
,Tuple.Create(128,10,20),Tuple.Create(256,10,20),Tuple.Create(480,10,20),Tuple.Create( 1,20,30)
,Tuple.Create( 2,20,30),Tuple.Create( 4,20,30),Tuple.Create( 8,20,30),Tuple.Create( 16,20,30)
,Tuple.Create( 32,20,30),Tuple.Create( 64,20,30),Tuple.Create(128,20,30),Tuple.Create(256,20,30)
,Tuple.Create(290,20,30),Tuple.Create( 1,30,40),Tuple.Create( 2,30,40),Tuple.Create( 4,30,40)
,Tuple.Create( 8,30,40),Tuple.Create( 16,30,40),Tuple.Create( 32,30,40),Tuple.Create( 64,30,40)
,Tuple.Create(128,30,40),Tuple.Create(210,30,40),Tuple.Create( 1,40,50),Tuple.Create( 2,40,50)
,Tuple.Create( 4,40,50),Tuple.Create( 8,40,50),Tuple.Create( 16,40,50),Tuple.Create( 32,40,50)
,Tuple.Create( 64,40,50),Tuple.Create(128,40,50),Tuple.Create(165,40,50),Tuple.Create( 32,01,10)
,Tuple.Create( 1,90,99),Tuple.Create( 2,90,99),Tuple.Create( 4,90,99),Tuple.Create( 8,90,99)
,Tuple.Create( 16,90,99),Tuple.Create( 32,90,99),Tuple.Create( 64,90,99),Tuple.Create( 80,90,99)
,Tuple.Create(4000, 01, 01),Tuple.Create(2000,01,05),Tuple.Create(1150,01,10),Tuple.Create(1,8000,8000),};
static readonly Random rand = new Random();
static readonly byte[] _bytes = initBytes();
static readonly Dictionary<string, object>[] results = new Dictionary<string, object>[_tests.Length];
static byte[] initBytes()
{
var ret = new byte[8000];
var l = new ulong[] { 0x0102030405060708, 0x090A010203040506, 0x0708090A01020304, 0x05060708090A0102, 0x030405060708090A,
0x0102030405060708, 0x090A010203040506, 0x0708090A01020304, 0x05060708090A0102, 0x030405060708090A,
0x0102030405060708, 0x090A010203040506, 0x0708090A01020304, 0x05060708090A0102, 0x030405060708090A,
0x0102030405};
Buffer.BlockCopy(l, 0, ret, 0, 125);
for (int i = 0, j = 125; i < 6; i++, j *= 2)
Buffer.BlockCopy(ret, 0, ret, j, j);
return ret;
}
public static byte[] BuildRow(int numElems, int minwidth, int maxWidth)
{
var l = Enumerable.Range(0, numElems).Select(x => minwidth + rand.Next(1 + maxWidth - minwidth)).ToArray();
var b = new byte[l.Sum() + numElems - 1];
var ix = 0;
Buffer.BlockCopy(_bytes, 0, b, ix, l[0]);
ix = l[0];
for (var i = 1; i < l.Length; i++)
{
var e = l[i];
b[ix] = 0x44;
Buffer.BlockCopy(_bytes, 0, b, ix + 1, e);
ix += e + 1;
}
return b;
}
public static byte[][] BuildTable(Tuple<int, int, int> specification)
{
return Enumerable.Range(0, 1000).Select(_ => BuildRow(specification.Item1, specification.Item2, specification.Item3)).ToArray();
}
public static void RunTests()
{
var sw = new Stopwatch();
long k = 0;
for (var i = 0; i < _tests.Length; i++)
{
var data = BuildTable(_tests[i]);
results[i] = new Dictionary<string, object>();
//results[i["data"]=data;
results[i]["_numElem"] = _tests[i].Item1;
results[i]["_minwidth"] = _tests[i].Item2;
results[i]["_maxWidth"] = _tests[i].Item3;
Func<byte[], int> selector = b => b.Length;
results[i]["_minLength"] = data.Min(selector);
results[i]["_maxLength"] = data.Max(selector);
results[i]["_avgLength"] = data.Average(selector);
for (var j = 0; j < _candidates.Length; j++)
{
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
sw.Start();
foreach (var x in data)
{
for (var z = 0; z < 10; z++)
k += DelegateToEnumerable(_candidates[j],x,0x44).Sum(r => (long)r);
}
sw.Stop();
results[i][j.ToString()+"_"+_candidates[j].Method.Name] = sw.ElapsedMilliseconds;
sw.Reset();
}
}
Console.WriteLine(k);
}
static string WriteTestResults()
{
var sb = new StringBuilder(8000);
sb.AppendLine("_" + "," + string.Join<String>(",", results[0].Keys));
for (var i = 0; i < results.Length; i++)
{
sb.AppendLine(i + "," + string.Join<object>(",", results[i].Values));
}
return sb.ToString();
}
//static readonly Func<byte[], byte, int, int> memchr=Memchr;
//static readonly Func<byte[], byte, int, int> arrayIndexOf=Array.IndexOf<byte>;
static readonly Func<byte[], byte, int, int,int>[] _candidates = {
IndexOf_SIMD,
Array.IndexOf<byte>,
Naive,
Delegate.CreateDelegate(typeof(Func<byte[],byte,int,int,int>),EqualityComparer<Byte>.Default,
EqualityComparer<Byte>.Default.GetType().GetMethod(
"IndexOf",
System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Instance)
) as Func<byte[],byte,int,int,int>};
static void Main(string[] args)
{
var testRow = BuildRow(32, 90, 100);
var test = DelegateToEnumerable(_candidates[1], testRow, 0x44).ToArray() as IStructuralEquatable;
foreach(var key in _candidates){
if (!test.Equals(DelegateToEnumerable(key, testRow, 0x44).ToArray(), StructuralComparisons.StructuralEqualityComparer))
throw new InvalidOperationException(key.Method.Name);
}
RunTests();
for (var i = 0; i < _candidates.Length;i++)
{
var key = i.ToString() + "_" + _candidates[i].Method.Name;
Console.WriteLine(key);
Console.WriteLine("Average:" + results.Average(x => Convert.ToInt64(x[key])).ToString("n"));
Console.WriteLine("Total:" + results.Sum(x => Convert.ToInt64(x[key])).ToString("n"));
}
System.IO.File.WriteAllText("testresults.csv", WriteTestResults());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment