Skip to content

Instantly share code, notes, and snippets.

@mattwarren
Last active July 25, 2023 00:20
Show Gist options
  • Save mattwarren/f0594a9f3afa9377a4bbc2bcf8e573c5 to your computer and use it in GitHub Desktop.
Save mattwarren/f0594a9f3afa9377a4bbc2bcf8e573c5 to your computer and use it in GitHub Desktop.
[Config(typeof(Config))]
public class Program
{
static void Main(string[] args)
{
var summary = BenchmarkRunner.Run<Program>();
}
private class Config : ManualConfig
{
public Config()
{
Add(Job.Clr.WithLaunchCount(1));
Add(new GCDiagnoser());
}
}
[Params("Foo;Bar", "Foo;FooBar;Whatever", "Bar;blaat;foo", "blaat;foo;Bar", "foo;Bar;Blaat", "foo;FooBar;Blaat",
"Bar1;Bar2;Bar3;Bar4;Bar", "Bar1;Bar2;Bar3;Bar4;NoMatch", "Foo;FooBar;Whatever", "Some;Other;Really;Interesting;Tokens")]
public string Value { get; set; }
public string Match = "Bar";
private static char [] delimeter = new [] { ';' };
[Benchmark(Baseline = true)]
public bool StringSplit()
{
// To make is a fair comparision!
if (string.IsNullOrEmpty(Match)) return false;
if (string.IsNullOrEmpty(Value)) return false;
var tokens = Value.Split(delimeter);
foreach (var token in tokens)
{
if (token == Match)
return true;
}
return false;
}
[Benchmark]
public bool ContainsToken()
{
return ContainsToken(Value, Match);
}
[Benchmark]
public bool ContainsTokenXoofx()
{
return ContainsTokenXoofx(Value, Match);
}
[Benchmark]
public bool ContainsTokenXoofxUnsafe()
{
return ContainsTokenXoofxUnsafe(Value, Match);
}
[Benchmark]
public bool ContainsTokenHellBrick()
{
return ContainsTokenHellBrick(Value, Match);
}
[Benchmark]
public bool ContainsTokenMonty()
{
return ContainsTokenMonty(Value, Match);
}
/// <summary>
/// Code taken from https://twitter.com/Nick_Craver/status/722741298575319040
/// Checks whether the *entire* token exists, correctly handling seperators
/// (optional at start and end) so: "Foo;Bar" contains "Bar", but
/// "Foo;FooBar;Whatever" does not
/// </summary>
public static bool ContainsToken(string value, string token, char delimiter = ';')
{
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
int lastIndex = -1, idx, endIndex = value.Length - token.Length, tokenLength = token.Length;
while ((idx = value.IndexOf(token, lastIndex + 1)) > lastIndex)
{
lastIndex = idx;
if ((idx == 0 || (value[idx - 1] == delimiter))
&& (idx == endIndex || (value[idx + tokenLength] == delimiter)))
{
return true;
}
}
return false;
}
// See https://gist.github.com/mattwarren/f0594a9f3afa9377a4bbc2bcf8e573c5#gistcomment-1757392
public static bool ContainsTokenXoofx(string value, string token, char delimiter = ';')
{
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
int length = value.Length;
int tokenLength = token.Length;
for (int i = 0; i < length; i++)
{
for (int j = 0; j < tokenLength; j++, i++)
{
if (i >= length || value[i] != token[j])
{
goto next;
}
}
if ((i == length || (i < length && value[i] == delimiter)))
{
return true;
}
next:
i = value.IndexOf(delimiter, i);
if (i < 0)
{
break;
}
}
return false;
}
public unsafe static bool ContainsTokenXoofxUnsafe(string value, string token, char delimiter = ';')
{
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
int length = value.Length;
int tokenLength = token.Length;
int i = 0;
fixed (char* pValue = value)
fixed (char* pToken = token)
while (true)
{
for (int j = 0; j < tokenLength; j++, i++)
{
if (i >= length || pValue[i] != pToken[j])
{
goto next;
}
}
if (i == length || pValue[i] == delimiter)
{
return true;
}
next:
i = value.IndexOf(delimiter, i) + 1;
if (i == 0)
{
break;
}
}
return false;
}
public static unsafe bool ContainsTokenHellBrick(string value, string token, char delimiter = ';')
{
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
int length = value.Length;
int tokenLength = token.Length;
int currentTokenOffset = 0;
int maxTokenOffset = length - tokenLength;
while (currentTokenOffset <= maxTokenOffset)
{
int nextDelimiterOffset = value.IndexOf(delimiter, currentTokenOffset);
if (nextDelimiterOffset < 0)
nextDelimiterOffset = length;
if (tokenLength == nextDelimiterOffset - currentTokenOffset)
{
for (int i = 0; i < tokenLength; i++)
{
if (value[currentTokenOffset + i] != token[i])
goto next;
}
return true;
}
next:
currentTokenOffset = nextDelimiterOffset + 1;
}
return false;
}
public static bool ContainsTokenMonty(string value, string token, char delimiter = ';')
{
// this code assumes you're running x64
// it _works_ regardless, but some ifdef junk for x32
// would be handy if you care about ideal x32 perf
const int charsPerLong = sizeof(long) / sizeof(char); // 4
const int charsPerInt = sizeof(int) / sizeof(char); // 2
const int bytesPerChar = sizeof(char) / sizeof(byte); // 2
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
var valueLength = value.Length;
var tokenLength = token.Length;
if (tokenLength > valueLength) return false;
int tokenLongs;
bool tokenTrailingInt, tokenTrailingChar;
{
var remainingChars = tokenLength;
tokenLongs = remainingChars / charsPerLong;
tokenTrailingInt = (tokenLength & 0x02) != 0;
tokenTrailingChar = (tokenLength & 0x01) != 0;
}
var tokenByteLength = tokenLength * bytesPerChar;
unsafe
{
fixed (char* valuePtr = value, tokenPtr = token)
{
var curValuePtr = valuePtr;
var endValuePtr = valuePtr + valueLength;
while (true)
{
// 8-byte steps
long* tokenLongPtr = (long*)tokenPtr;
{
for (var i = 0; i < tokenLongs; i++)
{
var tokenLong = *tokenLongPtr;
var valueLong = *((long*)curValuePtr);
if (tokenLong == valueLong)
{
// we have a match, continue
tokenLongPtr++;
curValuePtr += charsPerLong;
continue;
}
else
{
goto advanceToNextDelimiter;
}
}
}
// can only be 1 4-byte value, 'cause if there were 2 it'd have been handled in the 8-byte stride
int* tokenIntPtr = (int*)tokenLongPtr;
if (tokenTrailingInt)
{
var tokenInt = *tokenIntPtr;
var valueInt = *((int*)curValuePtr);
if (tokenInt == valueInt)
{
// we have a match, continue
tokenIntPtr++;
curValuePtr += charsPerInt;
}
else
{
goto advanceToNextDelimiter;
}
}
// likewise, only 1 2-byte value or it'd be handled in the 4-byte check
char* tokenCharPtr = (char*)tokenIntPtr;
if (tokenTrailingChar)
{
var tokenChar = *tokenCharPtr;
var valueChar = *curValuePtr;
if (tokenChar == valueChar)
{
// we have a match, continue
tokenCharPtr++;
curValuePtr++;
}
else
{
goto advanceToNextDelimiter;
}
}
// at this point, the full text of token has been matched, so we need to check if the _next_ char
// in value is a delimiter or the end of value
if (curValuePtr == endValuePtr || *curValuePtr == delimiter)
{
return true;
}
// in this case, we found a string that starts with value, but is not value
// "Foo" in "FooBar" for example
// advance here to skip the rest of whatever's before
// the next delimiter
advanceToNextDelimiter:
// our own IndexOf()-esque functionality
// at the end of this curValuePtr points to the
// character _after_ the next delimiter in the string
//
// not using IndexOf() is to save us the trouble
// of tracking additional state, or doing extra work,
// to move between the index-into-a-string world and
// our constantly-advancing-pointers world
while (true)
{
var curVal = *curValuePtr;
if (curVal == delimiter)
{
// got one!
curValuePtr++;
if (curValuePtr == endValuePtr)
{
// but there wasn't anything after it
return false;
}
break;
}
curValuePtr++;
if (curValuePtr == endValuePtr)
{
// no more delimiters
return false;
}
}
// we only need to do this check after a delimiter is advanced
// because this check _itself_ guarantees that we won't
// exceed the end of value before we're done comparing
// against token
var remainingBytesInValue = ((byte*)endValuePtr) - ((byte*)curValuePtr); // we do this bytes to save an op
if (remainingBytesInValue < tokenByteLength)
{
// not possible for token to fit in string, fail early
return false;
}
}
}
}
}
}
@ioctlLR
Copy link

ioctlLR commented May 25, 2016

BTW, @kevin-montrose: I found that your algorithms won't support tokens larger than 6 chars anywhere but the beginning of the list. I haven't tried to debug them, though.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment