Skip to content

Instantly share code, notes, and snippets.

@VAllens
Created June 12, 2024 12:50
Show Gist options
  • Save VAllens/a4790264da85db3ab40e1b9320b341f9 to your computer and use it in GitHub Desktop.
Save VAllens/a4790264da85db3ab40e1b9320b341f9 to your computer and use it in GitHub Desktop.
namespace AllenCai;
/// <summary>
/// This is an imitation of the Microsoft.VisualBasic.CompilerServices.LikeOperator.LikeString method,
/// implementing matching that supports * and ? wildcards and support ignores case rules.<br />
/// The purpose of this implementation is to reduce memory allocation and improve performance.
/// </summary>
public static class ZeroMemAllocLikeOperator
{
/// <summary>
/// Supports * and ? wildcards, support case-insensitive matching.
/// </summary>
public static bool LikeString(string? content, string? pattern, bool ignoreCase = true)
{
if (content == null && pattern == null)
return true;
if (content == null || pattern == null)
return false;
ReadOnlySpan<char> patternSpan = pattern.AsSpan();
ReadOnlySpan<char> contentSpan = content.AsSpan();
char zeroOrMoreChars = '*';
char oneChar = '?';
// If the pattern is composed of a single asterisk *, there is no need to match, return true directly.
if (patternSpan.Length == 1)
{
ref readonly char patternItem = ref patternSpan[0];
if (patternItem == zeroOrMoreChars)
{
return true;
}
}
// If the length of the content to be matched is only 1, and the pattern is exactly a question mark ?, there is no need to match, return true directly.
if (contentSpan.Length == 1)
{
ref readonly char patternItem = ref patternSpan[0];
if (patternItem == oneChar)
{
return true;
}
}
// If the pattern is composed of multiple asterisks * and question marks ?, there is no need to match, return true directly.
int zeroOrMorePatternCount = 0;
int onePatternCount = 0;
for (int i = 0; i < patternSpan.Length; i++)
{
ref readonly char patternItem = ref patternSpan[i];
if (patternItem == zeroOrMoreChars)
{
zeroOrMorePatternCount++;
}
else if (patternItem == oneChar)
{
onePatternCount++;
}
}
if (zeroOrMorePatternCount + onePatternCount == patternSpan.Length)
{
// As long as there is one or more asterisks *, there is no need to care about the length of the content to be matched.
if (zeroOrMorePatternCount > 0)
{
return true;
}
// If there is no asterisk *, all are question marks ?, then check whether the length of the pattern composed of question marks ? is consistent with the length of the content to be matched. If it is consistent, there is no need to match, return true directly.
if (patternSpan.Length == contentSpan.Length)
{
return true;
}
}
return LikeStringCore(contentSpan, patternSpan, ref zeroOrMoreChars, ref oneChar, ignoreCase);
}
/// <summary>
/// Supports * and ? wildcards, support case-insensitive matching.
/// </summary>
public static bool LikeString(ReadOnlySpan<char> contentSpan, ReadOnlySpan<char> patternSpan, bool ignoreCase = true)
{
char zeroOrMoreChars = '*';
char oneChar = '?';
// If the pattern is composed of a single asterisk *, there is no need to match, return true directly.
if (patternSpan.Length == 1)
{
ref readonly char patternItem = ref patternSpan[0];
if (patternItem == zeroOrMoreChars)
{
return true;
}
}
// If the length of the content to be matched is only 1, and the pattern is exactly a question mark ?, there is no need to match, return true directly.
if (contentSpan.Length == 1)
{
ref readonly char patternItem = ref patternSpan[0];
if (patternItem == oneChar)
{
return true;
}
}
// If the pattern is composed of multiple asterisks * and question marks ?, there is no need to match, return true directly.
int zeroOrMorePatternCount = 0;
int onePatternCount = 0;
for (int i = 0; i < patternSpan.Length; i++)
{
ref readonly char patternItem = ref patternSpan[i];
if (patternItem == zeroOrMoreChars)
{
zeroOrMorePatternCount++;
}
else if (patternItem == oneChar)
{
onePatternCount++;
}
}
if (zeroOrMorePatternCount + onePatternCount == patternSpan.Length)
{
// As long as there is one or more asterisks *, there is no need to care about the length of the content to be matched.
if (zeroOrMorePatternCount > 0)
{
return true;
}
// If there is no asterisk *, all are question marks ?, then check whether the length of the pattern composed of question marks ? is consistent with the length of the content to be matched. If it is consistent, there is no need to match, return true directly.
if (patternSpan.Length == contentSpan.Length)
{
return true;
}
}
return LikeStringCore(contentSpan, patternSpan, ref zeroOrMoreChars, ref oneChar, ignoreCase);
}
private static bool LikeStringCore(ReadOnlySpan<char> contentSpan, ReadOnlySpan<char> patternSpan, ref char zeroOrMoreChars, ref char oneChar, bool ignoreCase = true)
{
// Traverse the pattern, match character by character.
int contentIndex = 0;
int patternIndex = 0;
while (contentIndex < contentSpan.Length && patternIndex < patternSpan.Length)
{
ref readonly char patternItem = ref patternSpan[patternIndex];
if (patternItem == zeroOrMoreChars)
{
// If the next character in the pattern is an asterisk *, then move patternIndex back until a character that is not an asterisk * is found.
while (true)
{
if (patternIndex < patternSpan.Length)
{
ref readonly char nextPatternItem = ref patternSpan[patternIndex];
if (nextPatternItem == zeroOrMoreChars)
{
patternIndex++;
continue;
}
}
break;
}
// If patternIndex has reached the end of the pattern, there is no need to match anymore, return true directly.
if (patternIndex == patternSpan.Length)
{
return true;
}
// If patternIndex has not reached the end of the pattern, then start matching from contentIndex.
while (contentIndex < contentSpan.Length)
{
if (LikeStringCore(contentSpan.Slice(contentIndex), patternSpan.Slice(patternIndex), ref zeroOrMoreChars, ref oneChar, ignoreCase))
{
return true;
}
contentIndex++;
}
return false;
}
if (patternItem == oneChar)
{
// If the next character in the pattern is a question mark ?, then match one character.
contentIndex++;
patternIndex++;
}
else
{
// If the next character in the pattern is not an asterisk *, nor a question mark ?, then match one character.
if (contentIndex >= contentSpan.Length)
{
return false;
}
ref readonly char contentItem = ref contentSpan[contentIndex];
if (ignoreCase)
{
if (char.ToUpperInvariant(contentItem) != char.ToUpperInvariant(patternItem))
{
return false;
}
}
else
{
if (contentItem != patternItem)
{
return false;
}
}
contentIndex++;
patternIndex++;
}
}
// If all the content is matched, and the pattern has not been traversed, then check whether the remaining patternItem are all asterisks *. If yes, return true, otherwise return false.
if (contentIndex == contentSpan.Length)
{
// If the next character in the pattern is an asterisk *, then move patternIndex back until a character that is not an asterisk * is found.
while (true)
{
if (patternIndex < patternSpan.Length)
{
ref readonly char nextPatternItem = ref patternSpan[patternIndex];
if (nextPatternItem == zeroOrMoreChars)
{
patternIndex++;
continue;
}
}
break;
}
return patternIndex == patternSpan.Length;
}
return false;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment