Skip to content

Instantly share code, notes, and snippets.

@ayende
Last active September 14, 2023 18:25
Show Gist options
  • Save ayende/ad5ae9c3a4519b116db920f2b9ea10cd to your computer and use it in GitHub Desktop.
Save ayende/ad5ae9c3a4519b116db920f2b9ea10cd to your computer and use it in GitHub Desktop.
public unsafe static int FilterCmp_Avx_Next(Span<long> items)
{
var len = items.Length;
if (len <= 0)
return 0;
ref var permuteStart = ref Unsafe.AsRef(PermuteTable[0]);
int outputIdx = 0;
int i = 0;
ref var output = ref items[i];
if (i + Vector256<long>.Count <= len)
{
var next = Vector256.LoadUnsafe(ref Unsafe.Add(ref output, i));
len -= Vector256<long>.Count;
for (; i + Vector256<long>.Count <= len; i += Vector256<long>.Count)
{
var v = next;
next = Vector256.LoadUnsafe(ref Unsafe.Add(ref output, i + Vector256<long>.Count));
var bits = v.ExtractMostSignificantBits();
if (bits == 0) // do we have _any_ negatives here?
{
v.StoreUnsafe(ref Unsafe.Add(ref output, outputIdx));
outputIdx += Vector256<long>.Count;
continue;
}
// complex case, we have to deal with some negatives
var permute = Vector256.LoadUnsafe(ref Unsafe.Add(ref permuteStart, (int)bits * (sizeof(int) * 8))).AsInt32();
var m = Avx2.PermuteVar8x32(v.AsInt32(), permute).AsInt64();
m.StoreUnsafe(ref Unsafe.Add(ref output, outputIdx));
outputIdx += 4 - BitOperations.PopCount(bits);
}
}
// remainder, do that in a scalar fashion
for (; i < len; i++)
{
ref var cur = ref Unsafe.Add(ref output, i);
if (cur < 0)
continue;
Unsafe.Add(ref output, outputIdx++) = cur;
}
return outputIdx;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment