Skip to content

Instantly share code, notes, and snippets.

@kevin-montrose
Created April 21, 2016 04:36
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kevin-montrose/c816b76f745172d6b9f63e3b8699cc24 to your computer and use it in GitHub Desktop.
Save kevin-montrose/c816b76f745172d6b9f63e3b8699cc24 to your computer and use it in GitHub Desktop.
public static bool ContainsTokenMonty(string value, string token, char delimiter = ';')
{
// this code assumes you're running x64
// it _works_ regardless, but some ifdef junk for x32
// would be handy if you care about ideal x32 perf
const int charsPerLong = sizeof(long) / sizeof(char); // 4
const int charsPerInt = sizeof(int) / sizeof(char); // 2
const int bytesPerChar = sizeof(char) / sizeof(byte); // 2
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
var valueLength = value.Length;
var tokenLength = token.Length;
if (tokenLength > valueLength) return false;
int tokenLongs;
bool tokenTrailingInt, tokenTrailingChar;
{
var remainingChars = tokenLength;
tokenLongs = remainingChars / charsPerLong;
tokenTrailingInt = (tokenLength & 0x02) != 0;
tokenTrailingChar = (tokenLength & 0x01) != 0;
}
var tokenByteLength = tokenLength * bytesPerChar;
unsafe
{
fixed (char* valuePtr = value, tokenPtr = token)
{
var curValuePtr = valuePtr;
var endValuePtr = valuePtr + valueLength;
while (true)
{
// 8-byte steps
long* tokenLongPtr = (long*)tokenPtr;
{
for (var i = 0; i < tokenLongs; i++)
{
var tokenLong = *tokenLongPtr;
var valueLong = *((long*)curValuePtr);
if (tokenLong == valueLong)
{
// we have a match, continue
tokenLongPtr++;
curValuePtr += charsPerLong;
continue;
}
else
{
goto advanceToNextDelimiter;
}
}
}
// can only be 1 4-byte value, 'cause if there were 2 it'd have been handled in the 8-byte stride
int* tokenIntPtr = (int*)tokenLongPtr;
if (tokenTrailingInt)
{
var tokenInt = *tokenIntPtr;
var valueInt = *((int*)curValuePtr);
if (tokenInt == valueInt)
{
// we have a match, continue
tokenIntPtr++;
curValuePtr += charsPerInt;
}
else
{
goto advanceToNextDelimiter;
}
}
// likewise, only 1 2-byte value or it'd be handled in the 4-byte check
char* tokenCharPtr = (char*)tokenIntPtr;
if (tokenTrailingChar)
{
var tokenChar = *tokenCharPtr;
var valueChar = *curValuePtr;
if (tokenChar == valueChar)
{
// we have a match, continue
tokenCharPtr++;
curValuePtr++;
}
else
{
goto advanceToNextDelimiter;
}
}
// at this point, the full text of token has been matched, so we need to check if the _next_ char
// in value is a delimiter or the end of value
if (curValuePtr == endValuePtr || *curValuePtr == delimiter)
{
return true;
}
// in this case, we found a string that starts with value, but is not value
// "Foo" in "FooBar" for example
// advance here to skip the rest of whatever's before
// the next delimiter
advanceToNextDelimiter:
// our own IndexOf()-esque functionality
// at the end of this curValuePtr points to the
// character _after_ the next delimiter in the string
//
// not using IndexOf() is to save us the trouble
// of tracking additional state, or doing extra work,
// to move between the index-into-a-string world and
// our constantly-advancing-pointers world
while(true)
{
var curVal = *curValuePtr;
if(curVal == delimiter)
{
// got one!
curValuePtr++;
if (curValuePtr == endValuePtr)
{
// but there wasn't anything after it
return false;
}
break;
}
curValuePtr++;
if (curValuePtr == endValuePtr)
{
// no more delimiters
return false;
}
}
// we only need to do this check after a delimiter is advanced
// because this check _itself_ guarantees that we won't
// exceed the end of value before we're done comparing
// against token
var remainingBytesInValue = ((byte*)endValuePtr) - ((byte*)curValuePtr); // we do this bytes to save an op
if (remainingBytesInValue < tokenByteLength)
{
// not possible for token to fit in string, fail early
return false;
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment