Skip to content

Instantly share code, notes, and snippets.

@MihaZupan
Created June 30, 2023 20:49
Show Gist options
  • Save MihaZupan/0b5561bcc4e40e054fd6f9be58b077b8 to your computer and use it in GitHub Desktop.
Save MihaZupan/0b5561bcc4e40e054fd6f9be58b077b8 to your computer and use it in GitHub Desktop.
using System.Buffers;
using System.Diagnostics.CodeAnalysis;
using System.Diagnostics;
using System.Text;
using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
BenchmarkRunner.Run<PathStringBenchmark>();
[MediumRunJob]
public class PathStringBenchmark
{
// Inputs are based on existing PathString benchmarks in the aspnetcore repo
private const string TestPath = "/api/a%2Fb/c";
private const string LongTestPath = "/thisMustBeAVeryLongPath/SoLongThatItCouldActuallyBeLargerToTheStackAllocThresholdValue/PathsShorterToThisAllocateLessOnHeapByUsingStackAllocation/api/a%20b";
private const string LongValidTestPath = "/thisMustBeAVeryLongPath/SoLongThatItCouldActuallyBeLargerToTheStackAllocThresholdValue/PathsShorterToThisAllocateLessOnHeapByUsingStackAllocation/api/abcd";
private const string LongTestPathEarlyPercent = "/t%20hisMustBeAVeryLongPath/SoLongButStillShorterToTheStackAllocThresholdValue/PathsShorterToThisAllocateLessOnHeap/api/a%20b";
private OldPathString _old;
private NewPathString _new;
[Params(
nameof(TestPath),
nameof(LongTestPath),
nameof(LongValidTestPath),
nameof(LongTestPathEarlyPercent)
)]
public string Path;
[GlobalSetup]
public void Setup()
{
string input = Path switch
{
nameof(TestPath) => TestPath,
nameof(LongTestPath) => LongTestPath,
nameof(LongValidTestPath) => LongValidTestPath,
nameof(LongTestPathEarlyPercent) => LongTestPathEarlyPercent,
_ => throw new UnreachableException()
};
_old = new OldPathString(input);
_new = new NewPathString(input);
}
[Benchmark]
public string Before() => _old.ToUriComponent();
[Benchmark(Baseline = true)]
public string After() => _new.ToUriComponent();
}
public readonly struct OldPathString
{
public OldPathString(string? value)
{
Value = value;
}
public string? Value { get; }
[MemberNotNullWhen(true, nameof(Value))]
public bool HasValue
{
get { return !string.IsNullOrEmpty(Value); }
}
public string ToUriComponent()
{
if (!HasValue)
{
return string.Empty;
}
var value = Value;
var i = 0;
for (; i < value.Length; i++)
{
if (!PathStringHelper.IsValidPathChar(value[i]) || PathStringHelper.IsPercentEncodedChar(value, i))
{
break;
}
}
if (i < value.Length)
{
return ToEscapedUriComponent(value, i);
}
return value;
}
private static string ToEscapedUriComponent(string value, int i)
{
StringBuilder? buffer = null;
var start = 0;
var count = i;
var requiresEscaping = false;
while (i < value.Length)
{
var isPercentEncodedChar = PathStringHelper.IsPercentEncodedChar(value, i);
if (PathStringHelper.IsValidPathChar(value[i]) || isPercentEncodedChar)
{
if (requiresEscaping)
{
// the current segment requires escape
buffer ??= new StringBuilder(value.Length * 3);
buffer.Append(Uri.EscapeDataString(value.Substring(start, count)));
requiresEscaping = false;
start = i;
count = 0;
}
if (isPercentEncodedChar)
{
count += 3;
i += 3;
}
else
{
count++;
i++;
}
}
else
{
if (!requiresEscaping)
{
// the current segment doesn't require escape
buffer ??= new StringBuilder(value.Length * 3);
buffer.Append(value, start, count);
requiresEscaping = true;
start = i;
count = 0;
}
count++;
i++;
}
}
if (count == value.Length && !requiresEscaping)
{
return value;
}
else
{
if (count > 0)
{
buffer ??= new StringBuilder(value.Length * 3);
if (requiresEscaping)
{
buffer.Append(Uri.EscapeDataString(value.Substring(start, count)));
}
else
{
buffer.Append(value, start, count);
}
}
return buffer?.ToString() ?? string.Empty;
}
}
}
public readonly struct NewPathString
{
private static readonly SearchValues<char> s_validPathChars =
SearchValues.Create("!$&'()*+,-./0123456789:;=@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~");
/// <summary>
/// Initialize the path string with a given value. This value must be in unescaped format. Use
/// PathString.FromUriComponent(value) if you have a path value which is in an escaped format.
/// </summary>
/// <param name="value">The unescaped path to be assigned to the Value property.</param>
public NewPathString(string? value)
{
Value = value;
}
public string? Value { get; }
public string ToUriComponent()
{
var value = Value;
if (string.IsNullOrEmpty(value))
{
return string.Empty;
}
var indexOfInvalidChar = value.AsSpan().IndexOfAnyExcept(s_validPathChars);
return indexOfInvalidChar < 0
? value
: ToEscapedUriComponent(value, indexOfInvalidChar);
}
private static string ToEscapedUriComponent(string value, int i)
{
StringBuilder? buffer = null;
var start = 0;
var count = i;
var requiresEscaping = false;
while (true)
{
if ((uint)i >= (uint)value.Length)
{
break;
}
var isPercentEncodedChar = false;
if (s_validPathChars.Contains(value[i]) || (isPercentEncodedChar = Uri.IsHexEncoding(value, i)))
{
if (requiresEscaping)
{
// the current segment requires escape
buffer ??= new StringBuilder(value.Length * 3);
buffer.Append(Uri.EscapeDataString(value.Substring(start, count)));
requiresEscaping = false;
start = i;
count = 0;
}
if (isPercentEncodedChar)
{
count += 3;
i += 3;
}
else
{
// We just saw a character we don't want to escape. It's likely there are more, do a vectorized search.
var charsToSkip = value.AsSpan(i).IndexOfAnyExcept(s_validPathChars);
if (charsToSkip < 0)
{
// Only valid characters remain
count += value.Length - i;
break;
}
count += charsToSkip;
i += charsToSkip;
}
}
else
{
if (!requiresEscaping)
{
// the current segment doesn't require escape
buffer ??= new StringBuilder(value.Length * 3);
buffer.Append(value, start, count);
requiresEscaping = true;
start = i;
count = 0;
}
count++;
i++;
}
}
if (count == value.Length && !requiresEscaping)
{
return value;
}
else
{
Debug.Assert(count > 0);
Debug.Assert(buffer is not null);
if (requiresEscaping)
{
buffer.Append(Uri.EscapeDataString(value.Substring(start, count)));
}
else
{
buffer.Append(value, start, count);
}
return buffer.ToString();
}
}
}
internal static class PathStringHelper
{
// uint[] bits uses 1 cache line (Array info + 16 bytes)
// bool[] would use 3 cache lines (Array info + 128 bytes)
// So we use 128 bits rather than 128 bytes/bools
private static readonly uint[] ValidPathChars = {
0b_0000_0000__0000_0000__0000_0000__0000_0000, // 0x00 - 0x1F
0b_0010_1111__1111_1111__1111_1111__1101_0010, // 0x20 - 0x3F
0b_1000_0111__1111_1111__1111_1111__1111_1111, // 0x40 - 0x5F
0b_0100_0111__1111_1111__1111_1111__1111_1110, // 0x60 - 0x7F
};
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsValidPathChar(char c)
{
// Use local array and uint .Length compare to elide the bounds check on array access
var validChars = ValidPathChars;
var i = (int)c;
// Array is in chunks of 32 bits, so get offset by dividing by 32
var offset = i >> 5; // i / 32;
// Significant bit position is the remainder of the above calc; i % 32 => i & 31
var significantBit = 1u << (i & 31);
// Check offset in bounds and check if significant bit set
return (uint)offset < (uint)validChars.Length &&
((validChars[offset] & significantBit) != 0);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsPercentEncodedChar(string str, int index)
{
var len = (uint)str.Length;
if (str[index] == '%' && index < len - 2)
{
return AreFollowingTwoCharsHex(str, index);
}
return false;
}
[MethodImpl(MethodImplOptions.NoInlining)]
private static bool AreFollowingTwoCharsHex(string str, int index)
{
Debug.Assert(index < str.Length - 2);
var c1 = str[index + 1];
var c2 = str[index + 2];
return IsHexadecimalChar(c1) && IsHexadecimalChar(c2);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static bool IsHexadecimalChar(char c)
{
// Between 0 - 9 or uppercased between A - F
return (uint)(c - '0') <= 9 || (uint)((c & ~0x20) - 'A') <= ('F' - 'A');
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment