Created
April 21, 2016 04:36
-
-
Save kevin-montrose/c816b76f745172d6b9f63e3b8699cc24 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
public static bool ContainsTokenMonty(string value, string token, char delimiter = ';') | |
{ | |
// this code assumes you're running x64 | |
// it _works_ regardless, but some ifdef junk for x32 | |
// would be handy if you care about ideal x32 perf | |
const int charsPerLong = sizeof(long) / sizeof(char); // 4 | |
const int charsPerInt = sizeof(int) / sizeof(char); // 2 | |
const int bytesPerChar = sizeof(char) / sizeof(byte); // 2 | |
if (string.IsNullOrEmpty(token)) return false; | |
if (string.IsNullOrEmpty(value)) return false; | |
var valueLength = value.Length; | |
var tokenLength = token.Length; | |
if (tokenLength > valueLength) return false; | |
int tokenLongs; | |
bool tokenTrailingInt, tokenTrailingChar; | |
{ | |
var remainingChars = tokenLength; | |
tokenLongs = remainingChars / charsPerLong; | |
tokenTrailingInt = (tokenLength & 0x02) != 0; | |
tokenTrailingChar = (tokenLength & 0x01) != 0; | |
} | |
var tokenByteLength = tokenLength * bytesPerChar; | |
unsafe | |
{ | |
fixed (char* valuePtr = value, tokenPtr = token) | |
{ | |
var curValuePtr = valuePtr; | |
var endValuePtr = valuePtr + valueLength; | |
while (true) | |
{ | |
// 8-byte steps | |
long* tokenLongPtr = (long*)tokenPtr; | |
{ | |
for (var i = 0; i < tokenLongs; i++) | |
{ | |
var tokenLong = *tokenLongPtr; | |
var valueLong = *((long*)curValuePtr); | |
if (tokenLong == valueLong) | |
{ | |
// we have a match, continue | |
tokenLongPtr++; | |
curValuePtr += charsPerLong; | |
continue; | |
} | |
else | |
{ | |
goto advanceToNextDelimiter; | |
} | |
} | |
} | |
// can only be 1 4-byte value, 'cause if there were 2 it'd have been handled in the 8-byte stride | |
int* tokenIntPtr = (int*)tokenLongPtr; | |
if (tokenTrailingInt) | |
{ | |
var tokenInt = *tokenIntPtr; | |
var valueInt = *((int*)curValuePtr); | |
if (tokenInt == valueInt) | |
{ | |
// we have a match, continue | |
tokenIntPtr++; | |
curValuePtr += charsPerInt; | |
} | |
else | |
{ | |
goto advanceToNextDelimiter; | |
} | |
} | |
// likewise, only 1 2-byte value or it'd be handled in the 4-byte check | |
char* tokenCharPtr = (char*)tokenIntPtr; | |
if (tokenTrailingChar) | |
{ | |
var tokenChar = *tokenCharPtr; | |
var valueChar = *curValuePtr; | |
if (tokenChar == valueChar) | |
{ | |
// we have a match, continue | |
tokenCharPtr++; | |
curValuePtr++; | |
} | |
else | |
{ | |
goto advanceToNextDelimiter; | |
} | |
} | |
// at this point, the full text of token has been matched, so we need to check if the _next_ char | |
// in value is a delimiter or the end of value | |
if (curValuePtr == endValuePtr || *curValuePtr == delimiter) | |
{ | |
return true; | |
} | |
// in this case, we found a string that starts with value, but is not value | |
// "Foo" in "FooBar" for example | |
// advance here to skip the rest of whatever's before | |
// the next delimiter | |
advanceToNextDelimiter: | |
// our own IndexOf()-esque functionality | |
// at the end of this curValuePtr points to the | |
// character _after_ the next delimiter in the string | |
// | |
// not using IndexOf() is to save us the trouble | |
// of tracking additional state, or doing extra work, | |
// to move between the index-into-a-string world and | |
// our constantly-advancing-pointers world | |
while(true) | |
{ | |
var curVal = *curValuePtr; | |
if(curVal == delimiter) | |
{ | |
// got one! | |
curValuePtr++; | |
if (curValuePtr == endValuePtr) | |
{ | |
// but there wasn't anything after it | |
return false; | |
} | |
break; | |
} | |
curValuePtr++; | |
if (curValuePtr == endValuePtr) | |
{ | |
// no more delimiters | |
return false; | |
} | |
} | |
// we only need to do this check after a delimiter is advanced | |
// because this check _itself_ guarantees that we won't | |
// exceed the end of value before we're done comparing | |
// against token | |
var remainingBytesInValue = ((byte*)endValuePtr) - ((byte*)curValuePtr); // we do this bytes to save an op | |
if (remainingBytesInValue < tokenByteLength) | |
{ | |
// not possible for token to fit in string, fail early | |
return false; | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment