Skip to content

Instantly share code, notes, and snippets.

@xoofx
Created October 3, 2016 14:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xoofx/a00d16daac012d61acb950986d7c9e9e to your computer and use it in GitHub Desktop.
Save xoofx/a00d16daac012d61acb950986d7c9e9e to your computer and use it in GitHub Desktop.
FindFirstNonZeroByte (AVX)
// Ugly pure ASM version of https://github.com/aspnet/KestrelHttpServer/pull/1138 and gist https://gist.github.com/benaadams/2dd3f99230757111e91915f638067a09
// Not performing better though...
[Benchmark]
public int TestFindFirstByteAVX()
{
var vector = _vectors[ByteSet];
return FindFirstByteAVX(ref vector);
}
[MethodImpl(MethodImplOptions.NoInlining)]
public static int FindFirstByteAVX(ref Vector<byte> byteEquals)
{
throw new InvalidOperationException();
}
static unsafe FirstByteBenchmark()
{
// ASSUME THAT Vector<byte>.Count == 32 (AVX)
var method = typeof(FirstByteBenchmark).GetMethod("FindFirstByteAVX", BindingFlags.Public | BindingFlags.Static);
var methodHandle = method.MethodHandle;
RuntimeHelpers.PrepareMethod(methodHandle);
var functionPtr = methodHandle.GetFunctionPointer();
//auto mask = _mm256_setzero_si256();
//auto val = _mm256_loadu_si256((const __m256i*)byteEquals);
//auto result = _mm256_cmpeq_epi8(val, mask);
//auto a = ~_mm256_movemask_epi8(result);
//auto index = _tzcnt_u32(a);
//return index == 32 ? -1 : index;
// 0000f c5 f1 ef c9 vpxor xmm1, xmm1, xmm1
// 00013 c5 f5 74 09 vpcmpeqb ymm1, ymm1, YMMWORD PTR [rcx]
// 00017 c5 fd d7 c1 vpmovmskb eax, ymm1
// 0001b f7 d0 not eax
// 0001d 83 c9 ff or ecx, -1 ; ffffffffH
// 00020 f3 0f bc c0 tzcnt eax, eax
// 00024 83 f8 20 cmp eax, 32 ; 00000020H
// 00027 0f 44 c1 cmove eax, ecx
// 00032 c3 ret 0
byte[] patch = new byte[]
{
0xc5, 0xf1, 0xef, 0xc9,
0xc5, 0xf5, 0x74, 0x09,
0xc5, 0xfd, 0xd7, 0xc1,
0xf7, 0xd0,
0x83, 0xc9, 0xff,
0xf3, 0x0f, 0xbc, 0xc0,
0x83, 0xf8, 0x20,
0x0f, 0x44, 0xc1,
0xc3
};
for (int i = 0; i < patch.Length; i++)
{
((byte*)functionPtr)[i] = patch[i];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment