Last active
December 22, 2021 15:49
-
-
Save EgorBo/c8e8490ddd6f9a0d5b72c413ddd81d44 to your computer and use it in GitHub Desktop.
AVX-string-eq-inlined.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics; | |
using System.Runtime.Intrinsics.X86; | |
using BenchmarkDotNet.Attributes; | |
//| Method | headerName | Mean | Error | StdDev | Ratio | | |
//|----------------- |------------------- |---------:|----------:|----------:|------:| | |
//| StringEqauls | proxy-authenticate | 9.065 ns | 0.0030 ns | 0.0027 ns | 1.00 | | |
//| StringEqauls_AVX | proxy-authenticate | 1.464 ns | 0.0068 ns | 0.0053 ns | 0.16 | | |
//| StringEqauls_SSE | proxy-authenticate | 3.376 ns | 0.0010 ns | 0.0008 ns | 0.37 | | |
//| | | | | | | | |
//| StringEqauls | PROXY-AUTHENTICATE | 9.117 ns | 0.0520 ns | 0.0434 ns | 1.00 | | |
//| StringEqauls_AVX | PROXY-AUTHENTICATE | 1.468 ns | 0.0104 ns | 0.0087 ns | 0.16 | | |
//| StringEqauls_SSE | PROXY-AUTHENTICATE | 3.457 ns | 0.0519 ns | 0.0485 ns | 0.38 | | |
//| | | | | | | | |
//| StringEqauls | PROXY-AUTHENTICATX | 8.959 ns | 0.0144 ns | 0.0112 ns | 1.00 | | |
//| StringEqauls_AVX | PROXY-AUTHENTICATX | 1.485 ns | 0.0008 ns | 0.0007 ns | 0.17 | | |
//| StringEqauls_SSE | PROXY-AUTHENTICATX | 3.399 ns | 0.0014 ns | 0.0012 ns | 0.38 | | |
//| | | | | | | | |
//| StringEqauls | XROXY-AUTHENTICATE | 3.428 ns | 0.0024 ns | 0.0021 ns | 1.00 | | |
//| StringEqauls_AVX | XROXY-AUTHENTICATE | 1.223 ns | 0.0010 ns | 0.0009 ns | 0.36 | | |
//| StringEqauls_SSE | XROXY-AUTHENTICATE | 2.987 ns | 0.0009 ns | 0.0007 ns | 0.87 | | |
public class Benchmarks | |
{ | |
[Benchmark(Baseline = true)] | |
[Arguments("proxy-authenticate")] | |
[Arguments("PROXY-AUTHENTICATE")] | |
[Arguments("PROXY-AUTHENTICATX")] // late-out | |
[Arguments("XROXY-AUTHENTICATE")] // early-out | |
public bool StringEqauls(string headerName) | |
{ | |
return string.Equals(headerName, "Proxy-Authenticate", StringComparison.OrdinalIgnoreCase); | |
} | |
[Benchmark] | |
[Arguments("proxy-authenticate")] | |
[Arguments("PROXY-AUTHENTICATE")] | |
[Arguments("PROXY-AUTHENTICATX")] // late-out | |
[Arguments("XROXY-AUTHENTICATE")] // early-out | |
public bool StringEqauls_AVX(string headerName) | |
{ | |
return object.ReferenceEquals(headerName, "Proxy-Authenticate") || | |
(headerName.Length == "Proxy-Authenticate".Length && | |
CompareStringVsTwoVectors256AsciiIgnoreCase(headerName, | |
// Split "Proxy-Authenticate" into two 32-bytes vectors: "Proxy-Authentica" and "oxy-Authenticate" | |
Vector256.Create('P', 'R', 'O', 'X', 'Y', '-', 'A', 'U', 'T', 'H', 'E', 'N', 'T', 'I', 'C', 'A'), | |
Vector256.Create('O', 'X', 'Y', '-', 'A', 'U', 'T', 'H', 'E', 'N', 'T', 'I', 'C', 'A', 'T', 'E'))); | |
} | |
[Benchmark] | |
[Arguments("proxy-authenticate")] | |
[Arguments("PROXY-AUTHENTICATE")] | |
[Arguments("PROXY-AUTHENTICATX")] // late-out | |
[Arguments("XROXY-AUTHENTICATE")] // early-out | |
public bool StringEqauls_SSE(string headerName) | |
{ | |
return object.ReferenceEquals(headerName, "Proxy-Authenticate") || | |
(headerName.Length == "Proxy-Authenticate".Length && | |
CompareStringVsThreeVectors128AsciiIgnoreCase(headerName, | |
Vector128.Create('P', 'R', 'O', 'X', 'Y', '-', 'A', 'U'), | |
Vector128.Create('T', 'H', 'E', 'N', 'T', 'I', 'C', 'A'), | |
Vector128.Create('E', 'N', 'T', 'I', 'C', 'A', 'T', 'E'))); | |
} | |
private static bool CompareStringVsTwoVectors256AsciiIgnoreCase(string str1, | |
// str2 must be lower case | |
Vector256<ushort> str2_v1, | |
Vector256<ushort> str2_v2) | |
{ | |
ReadOnlySpan<char> span = str1.AsSpan(); | |
ref char spanStart = ref MemoryMarshal.GetReference(span); | |
var v1 = Unsafe.ReadUnaligned<Vector256<ushort>>(ref Unsafe.As<char, byte>(ref spanStart)); | |
var v2 = Unsafe.ReadUnaligned<Vector256<ushort>>( | |
ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str1.Length - 16))); | |
Vector256<ushort> v1up = ToUpper256(v1); | |
Vector256<ushort> v2up = ToUpper256(v2); | |
Vector256<ushort> cmp1 = Avx2.Xor(v1up, str2_v1); | |
Vector256<ushort> cmp2 = Avx2.Xor(v2up, str2_v2); | |
return Avx.TestZ(cmp1, cmp1) && | |
Avx.TestZ(cmp2, cmp2); | |
} | |
private static bool CompareStringVsThreeVectors128AsciiIgnoreCase(string str1, | |
// str2 must be lower case | |
Vector128<ushort> str2_v1, | |
Vector128<ushort> str2_v2, | |
Vector128<ushort> str2_v3) | |
{ | |
ReadOnlySpan<char> span = str1.AsSpan(); | |
ref char spanStart = ref MemoryMarshal.GetReference(span); | |
var v1 = Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref spanStart)); | |
var v2 = Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, 8))); | |
var v3 = Unsafe.ReadUnaligned<Vector128<ushort>>(ref Unsafe.As<char, byte>(ref Unsafe.Add(ref spanStart, str1.Length - 8))); | |
Vector128<ushort> v1up = ToUpper128(v1); | |
Vector128<ushort> v2up = ToUpper128(v2); | |
Vector128<ushort> v3up = ToUpper128(v3); | |
Vector128<ushort> cmp1 = Sse2.Xor(v1up, str2_v1); | |
Vector128<ushort> cmp2 = Sse2.Xor(v2up, str2_v2); | |
Vector128<ushort> cmp3 = Sse2.Xor(v3up, str2_v3); | |
return Sse41.TestZ(cmp1, cmp1) && | |
Sse41.TestZ(cmp2, cmp2) && | |
Sse41.TestZ(cmp3, cmp3); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
static Vector256<ushort> ToUpper256(Vector256<ushort> vec) | |
{ | |
Vector256<byte> rangeshift = Avx2.Subtract(vec.AsByte(), Vector256.Create((byte)('a' + 128))); | |
Vector256<sbyte> nomodify = Avx2.CompareGreaterThan(rangeshift.AsSByte(), Vector256.Create((sbyte)(-128 + 25))); | |
Vector256<byte> flip = Avx2.AndNot(nomodify.AsByte(), Vector256.Create((byte)0x20)); | |
return Avx2.Xor(vec, flip.AsUInt16()); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
static Vector128<ushort> ToUpper128(Vector128<ushort> vec) | |
{ | |
Vector128<byte> rangeshift = Sse2.Subtract(vec.AsByte(), Vector128.Create((byte)('a' + 128))); | |
Vector128<sbyte> nomodify = Sse2.CompareGreaterThan(rangeshift.AsSByte(), Vector128.Create((sbyte)(-128 + 25))); | |
Vector128<byte> flip = Sse2.AndNot(nomodify.AsByte(), Vector128.Create((byte)0x20)); | |
return Sse2.Xor(vec, flip.AsUInt16()); | |
} | |
public static void Main(string[] args) | |
{ | |
BenchmarkDotNet.Running.BenchmarkRunner.Run<Benchmarks>(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment