kevin-montrose/ContainsTokenMonty.cs

## ContainsTokenMonty.cs
public static bool ContainsTokenMonty(string value, string token, char delimiter = ';')
{
    // this code assumes you're running x64
    //   it _works_ regardless, but some ifdef junk for x32
    //   would be handy if you care about ideal x32 perf

    const int charsPerLong = sizeof(long) / sizeof(char);   // 4
    const int charsPerInt = sizeof(int) / sizeof(char);     // 2
    const int bytesPerChar = sizeof(char) / sizeof(byte);   // 2

    if (string.IsNullOrEmpty(token)) return false;
    if (string.IsNullOrEmpty(value)) return false;

    var valueLength = value.Length;
    var tokenLength = token.Length;

    if (tokenLength > valueLength) return false;

    int tokenLongs;
    bool tokenTrailingInt, tokenTrailingChar;
    {
        var remainingChars = tokenLength;
        tokenLongs = remainingChars / charsPerLong;
        tokenTrailingInt = (tokenLength & 0x02) != 0;
        tokenTrailingChar = (tokenLength & 0x01) != 0;
    }

    var tokenByteLength = tokenLength * bytesPerChar;

    unsafe
    {
        fixed (char* valuePtr = value, tokenPtr = token)
        {
            var curValuePtr = valuePtr;
            var endValuePtr = valuePtr + valueLength;

            while (true)
            {
                // 8-byte steps
                long* tokenLongPtr = (long*)tokenPtr;
                {
                    for (var i = 0; i < tokenLongs; i++)
                    {
                        var tokenLong = *tokenLongPtr;

                        var valueLong = *((long*)curValuePtr);

                        if (tokenLong == valueLong)
                        {
                            // we have a match, continue
                            tokenLongPtr++;
                            curValuePtr += charsPerLong;
                            continue;
                        }
                        else
                        {
                            goto advanceToNextDelimiter;
                        }
                    }
                }

                // can only be 1 4-byte value, 'cause if there were 2 it'd have been handled in the 8-byte stride
                int* tokenIntPtr = (int*)tokenLongPtr;
                if (tokenTrailingInt)
                {
                    var tokenInt = *tokenIntPtr;

                    var valueInt = *((int*)curValuePtr);

                    if (tokenInt == valueInt)
                    {
                        // we have a match, continue
                        tokenIntPtr++;
                        curValuePtr += charsPerInt;
                    }
                    else
                    {
                        goto advanceToNextDelimiter;
                    }
                }

                // likewise, only 1 2-byte value or it'd be handled in the 4-byte check
                char* tokenCharPtr = (char*)tokenIntPtr;
                if (tokenTrailingChar)
                {
                    var tokenChar = *tokenCharPtr;

                    var valueChar = *curValuePtr;

                    if (tokenChar == valueChar)
                    {
                        // we have a match, continue
                        tokenCharPtr++;
                        curValuePtr++;
                    }
                    else
                    {
                        goto advanceToNextDelimiter;
                    }
                }

                // at this point, the full text of token has been matched, so we need to check if the _next_ char
                //    in value is a delimiter or the end of value
                if (curValuePtr == endValuePtr || *curValuePtr == delimiter)
                {
                    return true;
                }

            // in this case, we found a string that starts with value, but is not value
            //    "Foo" in "FooBar" for example


            // advance here to skip the rest of whatever's before
            //   the next delimiter
            advanceToNextDelimiter:
                // our own IndexOf()-esque functionality
                //   at the end of this curValuePtr points to the
                //   character _after_ the next delimiter in the string
                //
                // not using IndexOf() is to save us the trouble
                //   of tracking additional state, or doing extra work,
                //   to move between the index-into-a-string world and
                //   our constantly-advancing-pointers world
                while(true)
                {
                    var curVal = *curValuePtr;
                    if(curVal == delimiter)
                    {
                        // got one!
                        curValuePtr++;

                        if (curValuePtr == endValuePtr)
                        {
                            // but there wasn't anything after it
                            return false;
                        }
                        break;
                    }

                    curValuePtr++;

                    if (curValuePtr == endValuePtr)
                    {
                        // no more delimiters
                        return false;
                    }
                }

                // we only need to do this check after a delimiter is advanced
                //    because this check _itself_ guarantees that we won't
                //    exceed the end of value before we're done comparing
                //    against token
                var remainingBytesInValue = ((byte*)endValuePtr) - ((byte*)curValuePtr);    // we do this bytes to save an op
                if (remainingBytesInValue < tokenByteLength)
                {
                    // not possible for token to fit in string, fail early
                    return false;
                }
            }
        }
    }
}
	public static bool ContainsTokenMonty(string value, string token, char delimiter = ';')
	{
	// this code assumes you're running x64
	// it _works_ regardless, but some ifdef junk for x32
	// would be handy if you care about ideal x32 perf

	const int charsPerLong = sizeof(long) / sizeof(char); // 4
	const int charsPerInt = sizeof(int) / sizeof(char); // 2
	const int bytesPerChar = sizeof(char) / sizeof(byte); // 2

	if (string.IsNullOrEmpty(token)) return false;
	if (string.IsNullOrEmpty(value)) return false;

	var valueLength = value.Length;
	var tokenLength = token.Length;

	if (tokenLength > valueLength) return false;

	int tokenLongs;
	bool tokenTrailingInt, tokenTrailingChar;
	{
	var remainingChars = tokenLength;
	tokenLongs = remainingChars / charsPerLong;
	tokenTrailingInt = (tokenLength & 0x02) != 0;
	tokenTrailingChar = (tokenLength & 0x01) != 0;
	}

	var tokenByteLength = tokenLength * bytesPerChar;

	unsafe
	{
	fixed (char* valuePtr = value, tokenPtr = token)
	{
	var curValuePtr = valuePtr;
	var endValuePtr = valuePtr + valueLength;

	while (true)
	{
	// 8-byte steps
	long* tokenLongPtr = (long*)tokenPtr;
	{
	for (var i = 0; i < tokenLongs; i++)
	{
	var tokenLong = *tokenLongPtr;

	var valueLong = ((long)curValuePtr);

	if (tokenLong == valueLong)
	{
	// we have a match, continue
	tokenLongPtr++;
	curValuePtr += charsPerLong;
	continue;
	}
	else
	{
	goto advanceToNextDelimiter;
	}
	}
	}

	// can only be 1 4-byte value, 'cause if there were 2 it'd have been handled in the 8-byte stride
	int* tokenIntPtr = (int*)tokenLongPtr;
	if (tokenTrailingInt)
	{
	var tokenInt = *tokenIntPtr;

	var valueInt = ((int)curValuePtr);

	if (tokenInt == valueInt)
	{
	// we have a match, continue
	tokenIntPtr++;
	curValuePtr += charsPerInt;
	}
	else
	{
	goto advanceToNextDelimiter;
	}
	}

	// likewise, only 1 2-byte value or it'd be handled in the 4-byte check
	char* tokenCharPtr = (char*)tokenIntPtr;
	if (tokenTrailingChar)
	{
	var tokenChar = *tokenCharPtr;

	var valueChar = *curValuePtr;

	if (tokenChar == valueChar)
	{
	// we have a match, continue
	tokenCharPtr++;
	curValuePtr++;
	}
	else
	{
	goto advanceToNextDelimiter;
	}
	}

	// at this point, the full text of token has been matched, so we need to check if the _next_ char
	// in value is a delimiter or the end of value
	if (curValuePtr == endValuePtr \|\| *curValuePtr == delimiter)
	{
	return true;
	}

	// in this case, we found a string that starts with value, but is not value
	// "Foo" in "FooBar" for example


	// advance here to skip the rest of whatever's before
	// the next delimiter
	advanceToNextDelimiter:
	// our own IndexOf()-esque functionality
	// at the end of this curValuePtr points to the
	// character _after_ the next delimiter in the string
	//
	// not using IndexOf() is to save us the trouble
	// of tracking additional state, or doing extra work,
	// to move between the index-into-a-string world and
	// our constantly-advancing-pointers world
	while(true)
	{
	var curVal = *curValuePtr;
	if(curVal == delimiter)
	{
	// got one!
	curValuePtr++;

	if (curValuePtr == endValuePtr)
	{
	// but there wasn't anything after it
	return false;
	}
	break;
	}

	curValuePtr++;

	if (curValuePtr == endValuePtr)
	{
	// no more delimiters
	return false;
	}
	}

	// we only need to do this check after a delimiter is advanced
	// because this check _itself_ guarantees that we won't
	// exceed the end of value before we're done comparing
	// against token
	var remainingBytesInValue = ((byte)endValuePtr) - ((byte)curValuePtr); // we do this bytes to save an op
	if (remainingBytesInValue < tokenByteLength)
	{
	// not possible for token to fit in string, fail early
	return false;
	}
	}
	}
	}
	}