Skip to content

Instantly share code, notes, and snippets.

@mgravell
Last active January 24, 2023 16:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mgravell/54aaac9e1325511cd43aa52246a6c808 to your computer and use it in GitHub Desktop.
Save mgravell/54aaac9e1325511cd43aa52246a6c808 to your computer and use it in GitHub Desktop.
ascii decode
// based on StringUtilities from dotnet/aspnetcore
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Jobs;
using BenchmarkDotNet.Running;
using System.Buffers;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics.X86;
using System.Runtime.Intrinsics;
using System.Diagnostics.CodeAnalysis;
BenchmarkRunner.Run<MyBenchmark>();
// manual test rig
//var obj = new MyBenchmark();
//obj.Setup();
//obj.Length = 5;
//Console.WriteLine(obj.ToUtf16_WithCheck());
//Console.WriteLine(obj.ToUtf16_NoCheck());
//Console.WriteLine(obj.TryGetAsciiString());
[SimpleJob(RuntimeMoniker.Net80)]
public class MyBenchmark
{
[GlobalSetup]
public void Setup()
{
headerBytes = new byte[1024];
Array.Fill(headerBytes, (byte)'a');
}
private byte[] headerBytes = Array.Empty<byte>();
private ReadOnlySpan<byte> Span => new ReadOnlySpan<byte>(headerBytes, 0, Length);
[Params(0, 5, 10, 20, 40, 80, 128, 256)]
public int Length { get; set; }
const int OperationsPerInvoke = 1024;
[Benchmark(Baseline = true, OperationsPerInvoke = OperationsPerInvoke)]
public string? TryGetAsciiString()
{
ReadOnlySpan<byte> span = Span;
string? last = null;
for (int i = 0; i < OperationsPerInvoke; i++)
{
last = StringUtilities.GetHeaderName(span);
}
return last;
}
[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public string? ToUtf16_WithCheck()
{
ReadOnlySpan<byte> span = Span;
string? last = null;
for (int i = 0; i < OperationsPerInvoke;i++)
{
last = StringUtilities.GetHeaderNameWithCheck(span);
}
return last;
}
[Benchmark(OperationsPerInvoke = OperationsPerInvoke)]
public string? ToUtf16_NoCheck()
{
ReadOnlySpan<byte> span = Span;
string? last = null;
for (int i = 0; i < OperationsPerInvoke; i++)
{
last = StringUtilities.GetHeaderNameNoCheck(span);
}
return last;
}
}
static class StringUtilities
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe string GetHeaderName(this ReadOnlySpan<byte> span)
{
if (span.IsEmpty)
{
return string.Empty;
}
fixed (byte* source = &MemoryMarshal.GetReference(span))
{
return string.Create<nint>(span.Length, (nint)source, s_getHeaderName);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe string GetHeaderNameWithCheck(this ReadOnlySpan<byte> span)
{
if (span.IsEmpty)
{
return string.Empty;
}
fixed (byte* source = &MemoryMarshal.GetReference(span))
{
return string.Create<nint>(span.Length, (nint)source, s_getHeaderNameWithCheck);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe string GetHeaderNameNoCheck(this ReadOnlySpan<byte> span)
{
if (span.IsEmpty)
{
return string.Empty;
}
fixed (byte* source = &MemoryMarshal.GetReference(span))
{
return string.Create<nint>(span.Length, (nint)source, s_getHeaderNameNoCheck);
}
}
private static readonly SpanAction<char, nint> s_getHeaderName = GetHeaderName,
s_getHeaderNameWithCheck = GetHeaderNameWithCheck, s_getHeaderNameNoCheck = GetHeaderNameNoCheck;
private static unsafe void GetHeaderName(Span<char> buffer, nint state)
{
fixed (char* output = &MemoryMarshal.GetReference(buffer))
{
// This version of AsciiUtilities returns null if there are any null (0 byte) characters
// in the string
if (!StringUtilities.TryGetAsciiString((byte*)state, output, buffer.Length))
{
KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidCharactersInHeaderName);
}
}
}
private static unsafe void GetHeaderNameWithCheck(Span<char> buffer, nint state)
{
ReadOnlySpan<byte> source = new(state.ToPointer(), buffer.Length);
// TODO: this check should be done in Ascii.ToUtf16 as extra mode.
if (source.IndexOf((byte)0) >= 0)
{
KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidCharactersInHeaderName);
}
OperationStatus status = System.Text.Ascii.ToUtf16(source, buffer, out _);
if (status != OperationStatus.Done)
{
KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidCharactersInHeaderName);
}
}
private static unsafe void GetHeaderNameNoCheck(Span<char> buffer, nint state)
{
ReadOnlySpan<byte> source = new(state.ToPointer(), buffer.Length);
OperationStatus status = System.Text.Ascii.ToUtf16(source, buffer, out _);
if (status != OperationStatus.Done)
{
KestrelBadHttpRequestException.Throw(RequestRejectionReason.InvalidCharactersInHeaderName);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool CheckBytesInAsciiRange(int check)
{
const int HighBits = unchecked((int)0x80808080);
return (((check - 0x01010101) | check) & HighBits) == 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] // Needs a push
private static bool CheckBytesInAsciiRange(Vector<sbyte> check)
{
// Vectorized byte range check, signed byte > 0 for 1-127
return Vector.GreaterThanAll(check, Vector<sbyte>.Zero);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool CheckBytesInAsciiRange(Vector128<sbyte> check, Vector128<sbyte> zero)
{
Debug.Assert(Sse2.IsSupported);
var mask = Sse2.CompareGreaterThan(check, zero);
return Sse2.MoveMask(mask) == 0xFFFF;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool CheckBytesInAsciiRange(Vector256<sbyte> check, Vector256<sbyte> zero)
{
Debug.Assert(Avx2.IsSupported);
var mask = Avx2.CompareGreaterThan(check, zero);
return (uint)Avx2.MoveMask(mask) == 0xFFFF_FFFF;
}
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
public static unsafe bool TryGetAsciiString(byte* input, char* output, int count)
{
Debug.Assert(input != null);
Debug.Assert(output != null);
var end = input + count;
Debug.Assert((long)end >= Vector256<sbyte>.Count);
// PERF: so the JIT can reuse the zero from a register
var zero = Vector128<sbyte>.Zero;
if (Sse2.IsSupported)
{
if (Avx2.IsSupported && input <= end - Vector256<sbyte>.Count)
{
var avxZero = Vector256<sbyte>.Zero;
do
{
var vector = Avx.LoadVector256(input).AsSByte();
if (!CheckBytesInAsciiRange(vector, avxZero))
{
return false;
}
var tmp0 = Avx2.UnpackLow(vector, avxZero);
var tmp1 = Avx2.UnpackHigh(vector, avxZero);
// Bring into the right order
var out0 = Avx2.Permute2x128(tmp0, tmp1, 0x20);
var out1 = Avx2.Permute2x128(tmp0, tmp1, 0x31);
Avx.Store((ushort*)output, out0.AsUInt16());
Avx.Store((ushort*)output + Vector256<ushort>.Count, out1.AsUInt16());
input += Vector256<sbyte>.Count;
output += Vector256<sbyte>.Count;
} while (input <= end - Vector256<sbyte>.Count);
if (input == end)
{
return true;
}
}
if (input <= end - Vector128<sbyte>.Count)
{
do
{
var vector = Sse2.LoadVector128(input).AsSByte();
if (!CheckBytesInAsciiRange(vector, zero))
{
return false;
}
var c0 = Sse2.UnpackLow(vector, zero).AsUInt16();
var c1 = Sse2.UnpackHigh(vector, zero).AsUInt16();
Sse2.Store((ushort*)output, c0);
Sse2.Store((ushort*)output + Vector128<ushort>.Count, c1);
input += Vector128<sbyte>.Count;
output += Vector128<sbyte>.Count;
} while (input <= end - Vector128<sbyte>.Count);
if (input == end)
{
return true;
}
}
}
else if (Vector.IsHardwareAccelerated)
{
while (input <= end - Vector<sbyte>.Count)
{
var vector = Unsafe.AsRef<Vector<sbyte>>(input);
if (!CheckBytesInAsciiRange(vector))
{
return false;
}
Vector.Widen(
vector,
out Unsafe.AsRef<Vector<short>>(output),
out Unsafe.AsRef<Vector<short>>(output + Vector<short>.Count));
input += Vector<sbyte>.Count;
output += Vector<sbyte>.Count;
}
if (input == end)
{
return true;
}
}
if (Environment.Is64BitProcess) // Use Intrinsic switch for branch elimination
{
// 64-bit: Loop longs by default
while (input <= end - sizeof(long))
{
var value = *(long*)input;
if (!CheckBytesInAsciiRange(value))
{
return false;
}
// BMI2 could be used, but this variant is faster on both Intel and AMD.
if (Sse2.X64.IsSupported)
{
var vecNarrow = Sse2.X64.ConvertScalarToVector128Int64(value).AsSByte();
var vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64();
Sse2.Store((ulong*)output, vecWide);
}
else
{
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
output[4] = (char)input[4];
output[5] = (char)input[5];
output[6] = (char)input[6];
output[7] = (char)input[7];
}
input += sizeof(long);
output += sizeof(long);
}
if (input <= end - sizeof(int))
{
var value = *(int*)input;
if (!CheckBytesInAsciiRange(value))
{
return false;
}
WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero);
input += sizeof(int);
output += sizeof(int);
}
}
else
{
// 32-bit: Loop ints by default
while (input <= end - sizeof(int))
{
var value = *(int*)input;
if (!CheckBytesInAsciiRange(value))
{
return false;
}
WidenFourAsciiBytesToUtf16AndWriteToBuffer(output, input, value, zero);
input += sizeof(int);
output += sizeof(int);
}
}
if (input <= end - sizeof(short))
{
if (!CheckBytesInAsciiRange(((short*)input)[0]))
{
return false;
}
output[0] = (char)input[0];
output[1] = (char)input[1];
input += sizeof(short);
output += sizeof(short);
}
if (input < end)
{
if (!CheckBytesInAsciiRange(((sbyte*)input)[0]))
{
return false;
}
output[0] = (char)input[0];
}
return true;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe void WidenFourAsciiBytesToUtf16AndWriteToBuffer(char* output, byte* input, int value, Vector128<sbyte> zero)
{
// BMI2 could be used, but this variant is faster on both Intel and AMD.
if (Sse2.X64.IsSupported)
{
var vecNarrow = Sse2.ConvertScalarToVector128Int32(value).AsSByte();
var vecWide = Sse2.UnpackLow(vecNarrow, zero).AsUInt64();
Unsafe.WriteUnaligned(output, Sse2.X64.ConvertToUInt64(vecWide));
}
else
{
output[0] = (char)input[0];
output[1] = (char)input[1];
output[2] = (char)input[2];
output[3] = (char)input[3];
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)] // Needs a push
private static bool CheckBytesInAsciiRange(long check)
{
const long HighBits = unchecked((long)0x8080808080808080L);
return (((check - 0x0101010101010101L) | check) & HighBits) == 0;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool CheckBytesInAsciiRange(short check)
{
const short HighBits = unchecked((short)0x8080);
return (((short)(check - 0x0101) | check) & HighBits) == 0;
}
private static bool CheckBytesInAsciiRange(sbyte check)
=> check > 0;
}
internal static class KestrelBadHttpRequestException
{
[StackTraceHidden, DoesNotReturn]
internal static void Throw(RequestRejectionReason reason)
{
throw new InvalidOperationException();
}
}
internal enum RequestRejectionReason
{
InvalidCharactersInHeaderName
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment