-
-
Save MihaZupan/0b5561bcc4e40e054fd6f9be58b077b8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Buffers; | |
using System.Diagnostics.CodeAnalysis; | |
using System.Diagnostics; | |
using System.Text; | |
using System.Runtime.CompilerServices; | |
using BenchmarkDotNet.Attributes; | |
using BenchmarkDotNet.Running; | |
BenchmarkRunner.Run<PathStringBenchmark>(); | |
[MediumRunJob] | |
public class PathStringBenchmark | |
{ | |
// Inputs are based on existing PathString benchmarks in the aspnetcore repo | |
private const string TestPath = "/api/a%2Fb/c"; | |
private const string LongTestPath = "/thisMustBeAVeryLongPath/SoLongThatItCouldActuallyBeLargerToTheStackAllocThresholdValue/PathsShorterToThisAllocateLessOnHeapByUsingStackAllocation/api/a%20b"; | |
private const string LongValidTestPath = "/thisMustBeAVeryLongPath/SoLongThatItCouldActuallyBeLargerToTheStackAllocThresholdValue/PathsShorterToThisAllocateLessOnHeapByUsingStackAllocation/api/abcd"; | |
private const string LongTestPathEarlyPercent = "/t%20hisMustBeAVeryLongPath/SoLongButStillShorterToTheStackAllocThresholdValue/PathsShorterToThisAllocateLessOnHeap/api/a%20b"; | |
private OldPathString _old; | |
private NewPathString _new; | |
[Params( | |
nameof(TestPath), | |
nameof(LongTestPath), | |
nameof(LongValidTestPath), | |
nameof(LongTestPathEarlyPercent) | |
)] | |
public string Path; | |
[GlobalSetup] | |
public void Setup() | |
{ | |
string input = Path switch | |
{ | |
nameof(TestPath) => TestPath, | |
nameof(LongTestPath) => LongTestPath, | |
nameof(LongValidTestPath) => LongValidTestPath, | |
nameof(LongTestPathEarlyPercent) => LongTestPathEarlyPercent, | |
_ => throw new UnreachableException() | |
}; | |
_old = new OldPathString(input); | |
_new = new NewPathString(input); | |
} | |
[Benchmark] | |
public string Before() => _old.ToUriComponent(); | |
[Benchmark(Baseline = true)] | |
public string After() => _new.ToUriComponent(); | |
} | |
public readonly struct OldPathString | |
{ | |
public OldPathString(string? value) | |
{ | |
Value = value; | |
} | |
public string? Value { get; } | |
[MemberNotNullWhen(true, nameof(Value))] | |
public bool HasValue | |
{ | |
get { return !string.IsNullOrEmpty(Value); } | |
} | |
public string ToUriComponent() | |
{ | |
if (!HasValue) | |
{ | |
return string.Empty; | |
} | |
var value = Value; | |
var i = 0; | |
for (; i < value.Length; i++) | |
{ | |
if (!PathStringHelper.IsValidPathChar(value[i]) || PathStringHelper.IsPercentEncodedChar(value, i)) | |
{ | |
break; | |
} | |
} | |
if (i < value.Length) | |
{ | |
return ToEscapedUriComponent(value, i); | |
} | |
return value; | |
} | |
private static string ToEscapedUriComponent(string value, int i) | |
{ | |
StringBuilder? buffer = null; | |
var start = 0; | |
var count = i; | |
var requiresEscaping = false; | |
while (i < value.Length) | |
{ | |
var isPercentEncodedChar = PathStringHelper.IsPercentEncodedChar(value, i); | |
if (PathStringHelper.IsValidPathChar(value[i]) || isPercentEncodedChar) | |
{ | |
if (requiresEscaping) | |
{ | |
// the current segment requires escape | |
buffer ??= new StringBuilder(value.Length * 3); | |
buffer.Append(Uri.EscapeDataString(value.Substring(start, count))); | |
requiresEscaping = false; | |
start = i; | |
count = 0; | |
} | |
if (isPercentEncodedChar) | |
{ | |
count += 3; | |
i += 3; | |
} | |
else | |
{ | |
count++; | |
i++; | |
} | |
} | |
else | |
{ | |
if (!requiresEscaping) | |
{ | |
// the current segment doesn't require escape | |
buffer ??= new StringBuilder(value.Length * 3); | |
buffer.Append(value, start, count); | |
requiresEscaping = true; | |
start = i; | |
count = 0; | |
} | |
count++; | |
i++; | |
} | |
} | |
if (count == value.Length && !requiresEscaping) | |
{ | |
return value; | |
} | |
else | |
{ | |
if (count > 0) | |
{ | |
buffer ??= new StringBuilder(value.Length * 3); | |
if (requiresEscaping) | |
{ | |
buffer.Append(Uri.EscapeDataString(value.Substring(start, count))); | |
} | |
else | |
{ | |
buffer.Append(value, start, count); | |
} | |
} | |
return buffer?.ToString() ?? string.Empty; | |
} | |
} | |
} | |
public readonly struct NewPathString | |
{ | |
private static readonly SearchValues<char> s_validPathChars = | |
SearchValues.Create("!$&'()*+,-./0123456789:;=@ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"); | |
/// <summary> | |
/// Initialize the path string with a given value. This value must be in unescaped format. Use | |
/// PathString.FromUriComponent(value) if you have a path value which is in an escaped format. | |
/// </summary> | |
/// <param name="value">The unescaped path to be assigned to the Value property.</param> | |
public NewPathString(string? value) | |
{ | |
Value = value; | |
} | |
public string? Value { get; } | |
public string ToUriComponent() | |
{ | |
var value = Value; | |
if (string.IsNullOrEmpty(value)) | |
{ | |
return string.Empty; | |
} | |
var indexOfInvalidChar = value.AsSpan().IndexOfAnyExcept(s_validPathChars); | |
return indexOfInvalidChar < 0 | |
? value | |
: ToEscapedUriComponent(value, indexOfInvalidChar); | |
} | |
private static string ToEscapedUriComponent(string value, int i) | |
{ | |
StringBuilder? buffer = null; | |
var start = 0; | |
var count = i; | |
var requiresEscaping = false; | |
while (true) | |
{ | |
if ((uint)i >= (uint)value.Length) | |
{ | |
break; | |
} | |
var isPercentEncodedChar = false; | |
if (s_validPathChars.Contains(value[i]) || (isPercentEncodedChar = Uri.IsHexEncoding(value, i))) | |
{ | |
if (requiresEscaping) | |
{ | |
// the current segment requires escape | |
buffer ??= new StringBuilder(value.Length * 3); | |
buffer.Append(Uri.EscapeDataString(value.Substring(start, count))); | |
requiresEscaping = false; | |
start = i; | |
count = 0; | |
} | |
if (isPercentEncodedChar) | |
{ | |
count += 3; | |
i += 3; | |
} | |
else | |
{ | |
// We just saw a character we don't want to escape. It's likely there are more, do a vectorized search. | |
var charsToSkip = value.AsSpan(i).IndexOfAnyExcept(s_validPathChars); | |
if (charsToSkip < 0) | |
{ | |
// Only valid characters remain | |
count += value.Length - i; | |
break; | |
} | |
count += charsToSkip; | |
i += charsToSkip; | |
} | |
} | |
else | |
{ | |
if (!requiresEscaping) | |
{ | |
// the current segment doesn't require escape | |
buffer ??= new StringBuilder(value.Length * 3); | |
buffer.Append(value, start, count); | |
requiresEscaping = true; | |
start = i; | |
count = 0; | |
} | |
count++; | |
i++; | |
} | |
} | |
if (count == value.Length && !requiresEscaping) | |
{ | |
return value; | |
} | |
else | |
{ | |
Debug.Assert(count > 0); | |
Debug.Assert(buffer is not null); | |
if (requiresEscaping) | |
{ | |
buffer.Append(Uri.EscapeDataString(value.Substring(start, count))); | |
} | |
else | |
{ | |
buffer.Append(value, start, count); | |
} | |
return buffer.ToString(); | |
} | |
} | |
} | |
internal static class PathStringHelper | |
{ | |
// uint[] bits uses 1 cache line (Array info + 16 bytes) | |
// bool[] would use 3 cache lines (Array info + 128 bytes) | |
// So we use 128 bits rather than 128 bytes/bools | |
private static readonly uint[] ValidPathChars = { | |
0b_0000_0000__0000_0000__0000_0000__0000_0000, // 0x00 - 0x1F | |
0b_0010_1111__1111_1111__1111_1111__1101_0010, // 0x20 - 0x3F | |
0b_1000_0111__1111_1111__1111_1111__1111_1111, // 0x40 - 0x5F | |
0b_0100_0111__1111_1111__1111_1111__1111_1110, // 0x60 - 0x7F | |
}; | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static bool IsValidPathChar(char c) | |
{ | |
// Use local array and uint .Length compare to elide the bounds check on array access | |
var validChars = ValidPathChars; | |
var i = (int)c; | |
// Array is in chunks of 32 bits, so get offset by dividing by 32 | |
var offset = i >> 5; // i / 32; | |
// Significant bit position is the remainder of the above calc; i % 32 => i & 31 | |
var significantBit = 1u << (i & 31); | |
// Check offset in bounds and check if significant bit set | |
return (uint)offset < (uint)validChars.Length && | |
((validChars[offset] & significantBit) != 0); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
public static bool IsPercentEncodedChar(string str, int index) | |
{ | |
var len = (uint)str.Length; | |
if (str[index] == '%' && index < len - 2) | |
{ | |
return AreFollowingTwoCharsHex(str, index); | |
} | |
return false; | |
} | |
[MethodImpl(MethodImplOptions.NoInlining)] | |
private static bool AreFollowingTwoCharsHex(string str, int index) | |
{ | |
Debug.Assert(index < str.Length - 2); | |
var c1 = str[index + 1]; | |
var c2 = str[index + 2]; | |
return IsHexadecimalChar(c1) && IsHexadecimalChar(c2); | |
} | |
[MethodImpl(MethodImplOptions.AggressiveInlining)] | |
private static bool IsHexadecimalChar(char c) | |
{ | |
// Between 0 - 9 or uppercased between A - F | |
return (uint)(c - '0') <= 9 || (uint)((c & ~0x20) - 'A') <= ('F' - 'A'); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment