Last active
January 24, 2023 16:48
-
-
Save mgravell/54aaac9e1325511cd43aa52246a6c808 to your computer and use it in GitHub Desktop.
ascii decode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// based on StringUtilities from dotnet/aspnetcore | |
// Licensed to the .NET Foundation under one or more agreements. | |
// The .NET Foundation licenses this file to you under the MIT license. | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Jobs; | |
using BenchmarkDotNet.Running; | |
using System.Buffers; | |
using System.Diagnostics; | |
using System.Numerics; | |
using System.Runtime.CompilerServices; | |
using System.Runtime.InteropServices; | |
using System.Runtime.Intrinsics.X86; | |
using System.Runtime.Intrinsics; | |
using System.Diagnostics.CodeAnalysis; | |
BenchmarkRunner.Run<MyBenchmark>(); | |
// manual test rig | |
//var obj = new MyBenchmark(); | |
//obj.Setup(); | |
//obj.Length = 5; | |
//Console.WriteLine(obj.ToUtf16_WithCheck()); | |
//Console.WriteLine(obj.ToUtf16_NoCheck()); | |
//Console.WriteLine(obj.TryGetAsciiString()); | |
[SimpleJob(RuntimeMoniker.Net80)] | |
public class MyBenchmark | |
{ | |
[GlobalSetup] | |
public void Setup() | |
{ | |
headerBytes = new byte[1024]; | |
Array.Fill(headerBytes, (byte)'a'); | |
} | |
private byte[] headerBytes = Array.Empty<byte>(); | |
private ReadOnlySpan<byte> Span => new ReadOnlySpan<byte>(headerBytes, 0, Length); | |
[Params(0, 5, 10, 20, 40, 80, 128, 256)] | |
public int Length { get; set; } | |
const int OperationsPerInvoke = 1024; | |
[Benchmark(Baseline = true, OperationsPerInvoke = OperationsPerInvoke)] | |
public string? TryGetAsciiString() | |
{ | |
ReadOnlySpan<byte> span = Span; | |
string? last = null; | |
for (int i = 0; i < OperationsPerInvoke; i++) | |
{ | |
last = StringUtilities.GetHeaderName(span); | |
} | |
return last; | |
} | |
[Benchmark(OperationsPerInvoke = OperationsPerInvoke)] | |
public string? ToUtf16_WithCheck() | |
{ | |
ReadOnlySpan<byte> span = Span; | |
string? last = null; | |
for (int i = 0; i < OperationsPerInvoke;i++) | |
{ | |
last = StringUtilities.GetHeaderNameWithCheck(span); | |
} | |
return last; | |
} | |
[Benchmark(OperationsPerInvoke = OperationsPerInvoke)] | |
public string? ToUtf16_NoCheck() | |
{ | |
ReadOnlySpan<byte> span = Span; | |
string? last = null; | |
for (int i = 0; i < OperationsPerInvoke; i++) | |
{ | |
last = StringUtilities.GetHeaderNameNoCheck(span); | |
} | |
return last; | |
} | |
} | |
static class StringUtilities | |
{ | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static unsafe string GetHeaderName(this ReadOnlySpan<byte> span) | |
{ | |
if (span.IsEmpty) | |
{ | |
return string.Empty; | |
} | |
fixed (byte* source = &MemoryMarshal.GetReference(span)) | |
{ | |
return string.Create<nint>(span.Length, (nint)source, s_getHeaderName); | |
} | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static unsafe string GetHeaderNameWithCheck(this ReadOnlySpan<byte> span) | |
{ | |
if (span.IsEmpty) | |
{ | |
return string.Empty; | |
} | |
fixed (byte* source = &MemoryMarshal.GetReference(span)) | |
{ | |
return string.Create<nint>(span.Length, (nint)source, s_getHeaderNameWithCheck); | |
} | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static unsafe string GetHeaderNameNoCheck(this ReadOnlySpan<byte> span) | |
{ | |
if (span.IsEmpty) | |
{ | |
return string.Empty; | |
} | |
fixed (byte* source = &MemoryMarshal.GetReference(span)) | |
{ | |
return string.Create<nint>(span.Length, (nint)source, s_getHeaderNameNoCheck); | |
} | |
} | |
private static readonly SpanAction<char, nint> s_getHeaderName = GetHeaderName, | |
s_getHeaderNameWithCheck = GetHeaderNameWithCheck, s_getHeaderNameNoCheck = GetHeaderNameNoCheck; | |
private static unsafe void GetHeaderName(Span<char> buffer, nint state) | |
{ | |
fixed (char* output = &MemoryMarshal.GetReference(buffer)) | |
{ | |
// This version of AsciiUtilities returns null if there are any null (0 byte) characters | |
// in the string | |
if (!StringUtilities.TryGetAsciiString((byte*)state, output, buffer.Length)) | |
{ | |
KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidCharactersInHeaderName); | |
} | |
} | |
} | |
private static unsafe void GetHeaderNameWithCheck(Span<char> buffer, nint state) | |
{ | |
ReadOnlySpan<byte> source = new(state.ToPointer(), buffer.Length); | |
// TODO: this check should be done in Ascii.ToUtf16 as extra mode. | |
if (source.IndexOf((byte)0) >= 0) | |
{ | |
KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidCharactersInHeaderName); | |
} | |
OperationStatus status = System.Text.Ascii.ToUtf16(source, buffer, out _); | |
if (status != OperationStatus.Done) | |
{ | |
KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidCharactersInHeaderName); | |
} | |
} | |
private static unsafe void GetHeaderNameNoCheck(Span<char> buffer, nint state) | |
{ | |
ReadOnlySpan<byte> source = new(state.ToPointer(), buffer.Length); | |
OperationStatus status = System.Text.Ascii.ToUtf16(source, buffer, out _); | |
if (status != OperationStatus.Done) | |
{ | |
KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidCharactersInHeaderName); | |
} | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool CheckBytesInAsciiRange(int check) | |
{ | |
const int HighBits = unchecked((int)0x80808080); | |
return (((check - 0x01010101) | check) & HighBits) == 0; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] // Needs a push | |
private static bool CheckBytesInAsciiRange(Vector<sbyte> check) | |
{ | |
// Vectorized byte range check, signed byte > 0 for 1-127 | |
return Vector.GreaterThanAll(check, Vector<sbyte>.Zero); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool CheckBytesInAsciiRange(Vector128<sbyte> check, Vector128<sbyte> zero) | |
{ | |
Debug.Assert(Sse2.IsSupported); | |
var mask = Sse2.CompareGreaterThan(check, zero); | |
return Sse2.MoveMask(mask) == 0xFFFF; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool CheckBytesInAsciiRange(Vector256<sbyte> check, Vector256<sbyte> zero) | |
{ | |
Debug.Assert(Avx2.IsSupported); | |
var mask = Avx2.CompareGreaterThan(check, zero); | |
return (uint)Avx2.MoveMask(mask) == 0xFFFF_FFFF; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveOptimization)] | |
public static unsafe bool TryGetAsciiString(byte* input, char* output, int count) | |
{ | |
Debug.Assert(input != null); | |
Debug.Assert(output != null); | |
var end = input + count; | |
Debug.Assert((long)end >= Vector256<sbyte>.Count); | |
// PERF: so the JIT can reuse the zero from a register | |
var zero = Vector128<sbyte>.Zero; | |
if (Sse2.IsSupported) | |
{ | |
if (Avx2.IsSupported && input <= end - Vector256<sbyte>.Count) | |
{ | |
var avxZero = Vector256<sbyte>.Zero; | |
do | |
{ | |
var vector = Avx.LoadVector256(input).AsSByte(); | |
if (!CheckBytesInAsciiRange(vector, avxZero)) | |
{ | |
return false; | |
} | |
var tmp0 = Avx2.UnpackLow(vector, avxZero); | |
var tmp1 = Avx2.UnpackHigh(vector, avxZero); | |
// Bring into the right order | |
var out0 = Avx2.Permute2x128(tmp0, tmp1, 0x20); | |
var out1 = Avx2.Permute2x128(tmp0, tmp1, 0x31); | |
Avx.Store((ushort*)output, out0.AsUInt16()); | |
Avx.Store((ushort*)output + Vector256<ushort>.Count, out1.AsUInt16()); | |
input += Vector256<sbyte>.Count; | |
output += Vector256<sbyte>.Count; | |
} while (input <= end - Vector256<sbyte>.Count); | |
if (input == end) | |
{ | |
return true; | |
} | |
} | |
if (input <= end - Vector128<sbyte>.Count) | |
{ | |
do | |
{ | |
var vector = Sse2.LoadVector128(input).AsSByte(); | |
if (!CheckBytesInAsciiRange(vector, zero)) | |
{ | |
return false; | |
} | |
var c0 = Sse2.UnpackLow(vector, zero).AsUInt16(); | |
var c1 = Sse2.UnpackHigh(vector, zero).AsUInt16(); | |
Sse2.Store((ushort*)output, c0); | |
Sse2.Store((ushort*)output + Vector128<ushort>.Count, c1); | |
input += Vector128<sbyte>.Count; | |
output += Vector128<sbyte>.Count; | |
} while (input <= end - Vector128<sbyte>.Count); | |
if (input == end) | |
{ | |
return true; | |
} | |
} | |
} | |
else if (Vector.IsHardwareAccelerated) | |
{ | |
while (input <= end - Vector<sbyte>.Count) | |
{ | |
var vector = Unsafe.AsRef<Vector<sbyte>>(input); | |
if (!CheckBytesInAsciiRange(vector)) | |
{ | |
return false; | |
} | |
Vector.Widen( | |
vector, | |
out Unsafe.AsRef<Vector<short>>(output), | |
out Unsafe.AsRef<Vector<short>>(output + Vector<short>.Count)); | |
input += Vector<sbyte>.Count; | |
output += Vector<sbyte>.Count; | |
} | |
if (input == end) | |
{ | |
return true; | |
} | |
} | |
if (Environment.Is64BitProcess) // Use Intrinsic switch for branch elimination | |
{ | |
// 64-bit: Loop longs by default | |
while (input <= end - sizeof(long)) | |
{ | |
var value = *(long*)input; | |
if (!CheckBytesInAsciiRange(value)) | |
{ | |
return false; | |
} | |
// BMI2 could be used, but this variant is faster on both Intel and AMD. | |
if (Sse2.X64.IsSupported) | |
{ | |
var vecNarrow = Sse2.X64.ConvertScalarToVector128Int64(value).AsSByte(); | |
var vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64(); | |
Sse2.Store((ulong*)output, vecWide); | |
} | |
else | |
{ | |
output[0] = (char)input[0]; | |
output[1] = (char)input[1]; | |
output[2] = (char)input[2]; | |
output[3] = (char)input[3]; | |
output[4] = (char)input[4]; | |
output[5] = (char)input[5]; | |
output[6] = (char)input[6]; | |
output[7] = (char)input[7]; | |
} | |
input += sizeof(long); | |
output += sizeof(long); | |
} | |
if (input <= end - sizeof(int)) | |
{ | |
var value = *(int*)input; | |
if (!CheckBytesInAsciiRange(value)) | |
{ | |
return false; | |
} | |
WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero); | |
input += sizeof(int); | |
output += sizeof(int); | |
} | |
} | |
else | |
{ | |
// 32-bit: Loop ints by default | |
while (input <= end - sizeof(int)) | |
{ | |
var value = *(int*)input; | |
if (!CheckBytesInAsciiRange(value)) | |
{ | |
return false; | |
} | |
WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero); | |
input += sizeof(int); | |
output += sizeof(int); | |
} | |
} | |
if (input <= end - sizeof(short)) | |
{ | |
if (!CheckBytesInAsciiRange(((short*)input)[0])) | |
{ | |
return false; | |
} | |
output[0] = (char)input[0]; | |
output[1] = (char)input[1]; | |
input += sizeof(short); | |
output += sizeof(short); | |
} | |
if (input < end) | |
{ | |
if (!CheckBytesInAsciiRange(((sbyte*)input)[0])) | |
{ | |
return false; | |
} | |
output[0] = (char)input[0]; | |
} | |
return true; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static unsafe void WidenFourAsciiBytesToUtf16AndWriteToBuffer(char* output, byte* input, int value, Vector128<sbyte> zero) | |
{ | |
// BMI2 could be used, but this variant is faster on both Intel and AMD. | |
if (Sse2.X64.IsSupported) | |
{ | |
var vecNarrow = Sse2.ConvertScalarToVector128Int32(value).AsSByte(); | |
var vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64(); | |
Unsafe.WriteUnaligned(output, Sse2.X64.ConvertToUInt64(vecWide)); | |
} | |
else | |
{ | |
output[0] = (char)input[0]; | |
output[1] = (char)input[1]; | |
output[2] = (char)input[2]; | |
output[3] = (char)input[3]; | |
} | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] // Needs a push | |
private static bool CheckBytesInAsciiRange(long check) | |
{ | |
const long HighBits = unchecked((long)0x8080808080808080L); | |
return (((check - 0x0101010101010101L) | check) & HighBits) == 0; | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool CheckBytesInAsciiRange(short check) | |
{ | |
const short HighBits = unchecked((short)0x8080); | |
return (((short)(check - 0x0101) | check) & HighBits) == 0; | |
} | |
private static bool CheckBytesInAsciiRange(sbyte check) | |
=> check > 0; | |
} | |
internal static class KestrelBadHttpRequestException | |
{ | |
[StackTraceHidden, DoesNotReturn] | |
internal static void Throw(RequestRejectionReason reason) | |
{ | |
throw new InvalidOperationException(); | |
} | |
} | |
internal enum RequestRejectionReason | |
{ | |
InvalidCharactersInHeaderName | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment