Created
October 3, 2016 14:19
-
-
Save xoofx/a00d16daac012d61acb950986d7c9e9e to your computer and use it in GitHub Desktop.
FindFirstNonZeroByte (AVX)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Ugly pure ASM version of https://github.com/aspnet/KestrelHttpServer/pull/1138 and gist https://gist.github.com/benaadams/2dd3f99230757111e91915f638067a09 | |
// Not performing better though... | |
[Benchmark] | |
public int TestFindFirstByteAVX() | |
{ | |
var vector = _vectors[ByteSet]; | |
return FindFirstByteAVX(ref vector); | |
} | |
[MethodImpl(MethodImplOptions.NoInlining)] | |
public static int FindFirstByteAVX(ref Vector<byte> byteEquals) | |
{ | |
throw new InvalidOperationException(); | |
} | |
static unsafe FirstByteBenchmark() | |
{ | |
// ASSUME THAT Vector<byte>.Count == 32 (AVX) | |
var method = typeof(FirstByteBenchmark).GetMethod("FindFirstByteAVX", BindingFlags.Public | BindingFlags.Static); | |
var methodHandle = method.MethodHandle; | |
RuntimeHelpers.PrepareMethod(methodHandle); | |
var functionPtr = methodHandle.GetFunctionPointer(); | |
//auto mask = _mm256_setzero_si256(); | |
//auto val = _mm256_loadu_si256((const __m256i*)byteEquals); | |
//auto result = _mm256_cmpeq_epi8(val, mask); | |
//auto a = ~_mm256_movemask_epi8(result); | |
//auto index = _tzcnt_u32(a); | |
//return index == 32 ? -1 : index; | |
// 0000f c5 f1 ef c9 vpxor xmm1, xmm1, xmm1 | |
// 00013 c5 f5 74 09 vpcmpeqb ymm1, ymm1, YMMWORD PTR [rcx] | |
// 00017 c5 fd d7 c1 vpmovmskb eax, ymm1 | |
// 0001b f7 d0 not eax | |
// 0001d 83 c9 ff or ecx, -1 ; ffffffffH | |
// 00020 f3 0f bc c0 tzcnt eax, eax | |
// 00024 83 f8 20 cmp eax, 32 ; 00000020H | |
// 00027 0f 44 c1 cmove eax, ecx | |
// 00032 c3 ret 0 | |
byte[] patch = new byte[] | |
{ | |
0xc5, 0xf1, 0xef, 0xc9, | |
0xc5, 0xf5, 0x74, 0x09, | |
0xc5, 0xfd, 0xd7, 0xc1, | |
0xf7, 0xd0, | |
0x83, 0xc9, 0xff, | |
0xf3, 0x0f, 0xbc, 0xc0, | |
0x83, 0xf8, 0x20, | |
0x0f, 0x44, 0xc1, | |
0xc3 | |
}; | |
for (int i = 0; i < patch.Length; i++) | |
{ | |
((byte*)functionPtr)[i] = patch[i]; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment