mattwarren/ContainsTokenBenchmark.cs Secret

## ContainsTokenBenchmark.cs
[Config(typeof(Config))]
public class Program
{
    static void Main(string[] args)
    {
        var summary = BenchmarkRunner.Run<Program>();
    }

    private class Config : ManualConfig
    {
        public Config()
        {
            Add(Job.Clr.WithLaunchCount(1));
            Add(new GCDiagnoser());
        }
    }

    [Params("Foo;Bar", "Foo;FooBar;Whatever", "Bar;blaat;foo", "blaat;foo;Bar", "foo;Bar;Blaat", "foo;FooBar;Blaat",
        "Bar1;Bar2;Bar3;Bar4;Bar", "Bar1;Bar2;Bar3;Bar4;NoMatch", "Foo;FooBar;Whatever", "Some;Other;Really;Interesting;Tokens")]
    public string Value { get; set; }

    public string Match = "Bar";
    private static char [] delimeter = new [] { ';' };

    [Benchmark(Baseline = true)]
    public bool StringSplit()
    {
        // To make is a fair comparision!
        if (string.IsNullOrEmpty(Match)) return false;
        if (string.IsNullOrEmpty(Value)) return false;

        var tokens = Value.Split(delimeter);
        foreach (var token in tokens)
        {
            if (token == Match)
                return true;
        }
        return false;
    }

    [Benchmark]
    public bool ContainsToken()
    {
        return ContainsToken(Value, Match);
    }

    [Benchmark]
    public bool ContainsTokenXoofx()
    {
        return ContainsTokenXoofx(Value, Match);
    }

    [Benchmark]
    public bool ContainsTokenXoofxUnsafe()
    {
        return ContainsTokenXoofxUnsafe(Value, Match);
    }

    [Benchmark]
    public bool ContainsTokenHellBrick()
    {
        return ContainsTokenHellBrick(Value, Match);
    }

    [Benchmark]
    public bool ContainsTokenMonty()
    {
        return ContainsTokenMonty(Value, Match);
    }

    /// <summary>
    /// Code taken from https://twitter.com/Nick_Craver/status/722741298575319040
    /// Checks whether the *entire* token exists, correctly handling seperators
    /// (optional at start and end) so: "Foo;Bar" contains "Bar", but
    /// "Foo;FooBar;Whatever" does not
    /// </summary>
    public static bool ContainsToken(string value, string token, char delimiter = ';')
    {
        if (string.IsNullOrEmpty(token)) return false;
        if (string.IsNullOrEmpty(value)) return false;

        int lastIndex = -1, idx, endIndex = value.Length - token.Length, tokenLength = token.Length;
        while ((idx = value.IndexOf(token, lastIndex + 1)) > lastIndex)
        {
            lastIndex = idx;
            if ((idx == 0 || (value[idx - 1] == delimiter))
                && (idx == endIndex || (value[idx + tokenLength] == delimiter)))
            {
                return true;
            }
        }
        return false;
    }

    // See https://gist.github.com/mattwarren/f0594a9f3afa9377a4bbc2bcf8e573c5#gistcomment-1757392
    public static bool ContainsTokenXoofx(string value, string token, char delimiter = ';')
    {
        if (string.IsNullOrEmpty(token)) return false;
        if (string.IsNullOrEmpty(value)) return false;

        int length = value.Length;
        int tokenLength = token.Length;

        for (int i = 0; i < length; i++)
        {
            for (int j = 0; j < tokenLength; j++, i++)
            {
                if (i >= length || value[i] != token[j])
                {
                    goto next;
                }
            }

            if ((i == length || (i < length && value[i] == delimiter)))
            {
                return true;
            }

            next:
            i = value.IndexOf(delimiter, i);
            if (i < 0)
            {
                break;
            }
        }

        return false;
    }

    public unsafe static bool ContainsTokenXoofxUnsafe(string value, string token, char delimiter = ';')
    {
        if (string.IsNullOrEmpty(token)) return false;
        if (string.IsNullOrEmpty(value)) return false;

        int length = value.Length;
        int tokenLength = token.Length;
        int i = 0;
        fixed (char* pValue = value)
        fixed (char* pToken = token)
        while (true)
        {
            for (int j = 0; j < tokenLength; j++, i++)
            {
                if (i >= length || pValue[i] != pToken[j])
                {
                    goto next;
                }
            }

            if (i == length || pValue[i] == delimiter)
            {
                return true;
            }

            next:
            i = value.IndexOf(delimiter, i) + 1;
            if (i == 0)
            {
                break;
            }
        }

        return false;
    }

    public static unsafe bool ContainsTokenHellBrick(string value, string token, char delimiter = ';')
    {
        if (string.IsNullOrEmpty(token)) return false;
        if (string.IsNullOrEmpty(value)) return false;

        int length = value.Length;
        int tokenLength = token.Length;

        int currentTokenOffset = 0;
        int maxTokenOffset = length - tokenLength;

        while (currentTokenOffset <= maxTokenOffset)
        {
            int nextDelimiterOffset = value.IndexOf(delimiter, currentTokenOffset);
            if (nextDelimiterOffset < 0)
                nextDelimiterOffset = length;

            if (tokenLength == nextDelimiterOffset - currentTokenOffset)
            {
                for (int i = 0; i < tokenLength; i++)
                {
                    if (value[currentTokenOffset + i] != token[i])
                        goto next;
                }

                return true;
            }

            next:
            currentTokenOffset = nextDelimiterOffset + 1;
        }

        return false;
    }

    public static bool ContainsTokenMonty(string value, string token, char delimiter = ';')
    {
        // this code assumes you're running x64
        //   it _works_ regardless, but some ifdef junk for x32
        //   would be handy if you care about ideal x32 perf

        const int charsPerLong = sizeof(long) / sizeof(char);   // 4
        const int charsPerInt = sizeof(int) / sizeof(char);     // 2
        const int bytesPerChar = sizeof(char) / sizeof(byte);   // 2

        if (string.IsNullOrEmpty(token)) return false;
        if (string.IsNullOrEmpty(value)) return false;

        var valueLength = value.Length;
        var tokenLength = token.Length;

        if (tokenLength > valueLength) return false;

        int tokenLongs;
        bool tokenTrailingInt, tokenTrailingChar;
        {
            var remainingChars = tokenLength;
            tokenLongs = remainingChars / charsPerLong;
            tokenTrailingInt = (tokenLength & 0x02) != 0;
            tokenTrailingChar = (tokenLength & 0x01) != 0;
        }

        var tokenByteLength = tokenLength * bytesPerChar;

        unsafe
        {
            fixed (char* valuePtr = value, tokenPtr = token)
            {
                var curValuePtr = valuePtr;
                var endValuePtr = valuePtr + valueLength;

                while (true)
                {
                    // 8-byte steps
                    long* tokenLongPtr = (long*)tokenPtr;
                    {
                        for (var i = 0; i < tokenLongs; i++)
                        {
                            var tokenLong = *tokenLongPtr;

                            var valueLong = *((long*)curValuePtr);

                            if (tokenLong == valueLong)
                            {
                                // we have a match, continue
                                tokenLongPtr++;
                                curValuePtr += charsPerLong;
                                continue;
                            }
                            else
                            {
                                goto advanceToNextDelimiter;
                            }
                        }
                    }

                    // can only be 1 4-byte value, 'cause if there were 2 it'd have been handled in the 8-byte stride
                    int* tokenIntPtr = (int*)tokenLongPtr;
                    if (tokenTrailingInt)
                    {
                        var tokenInt = *tokenIntPtr;

                        var valueInt = *((int*)curValuePtr);

                        if (tokenInt == valueInt)
                        {
                            // we have a match, continue
                            tokenIntPtr++;
                            curValuePtr += charsPerInt;
                        }
                        else
                        {
                            goto advanceToNextDelimiter;
                        }
                    }

                    // likewise, only 1 2-byte value or it'd be handled in the 4-byte check
                    char* tokenCharPtr = (char*)tokenIntPtr;
                    if (tokenTrailingChar)
                    {
                        var tokenChar = *tokenCharPtr;

                        var valueChar = *curValuePtr;

                        if (tokenChar == valueChar)
                        {
                            // we have a match, continue
                            tokenCharPtr++;
                            curValuePtr++;
                        }
                        else
                        {
                            goto advanceToNextDelimiter;
                        }
                    }

                    // at this point, the full text of token has been matched, so we need to check if the _next_ char
                    //    in value is a delimiter or the end of value
                    if (curValuePtr == endValuePtr || *curValuePtr == delimiter)
                    {
                        return true;
                    }

                    // in this case, we found a string that starts with value, but is not value
                    //    "Foo" in "FooBar" for example


                    // advance here to skip the rest of whatever's before
                    //   the next delimiter
                    advanceToNextDelimiter:
                    // our own IndexOf()-esque functionality
                    //   at the end of this curValuePtr points to the
                    //   character _after_ the next delimiter in the string
                    //
                    // not using IndexOf() is to save us the trouble
                    //   of tracking additional state, or doing extra work,
                    //   to move between the index-into-a-string world and
                    //   our constantly-advancing-pointers world
                    while (true)
                    {
                        var curVal = *curValuePtr;
                        if (curVal == delimiter)
                        {
                            // got one!
                            curValuePtr++;

                            if (curValuePtr == endValuePtr)
                            {
                                // but there wasn't anything after it
                                return false;
                            }
                            break;
                        }

                        curValuePtr++;

                        if (curValuePtr == endValuePtr)
                        {
                            // no more delimiters
                            return false;
                        }
                    }

                    // we only need to do this check after a delimiter is advanced
                    //    because this check _itself_ guarantees that we won't
                    //    exceed the end of value before we're done comparing
                    //    against token
                    var remainingBytesInValue = ((byte*)endValuePtr) - ((byte*)curValuePtr);    // we do this bytes to save an op
                    if (remainingBytesInValue < tokenByteLength)
                    {
                        // not possible for token to fit in string, fail early
                        return false;
                    }
                }
            }
        }
    }
}
	[Config(typeof(Config))]
	public class Program
	{
	static void Main(string[] args)
	{
	var summary = BenchmarkRunner.Run<Program>();
	}

	private class Config : ManualConfig
	{
	public Config()
	{
	Add(Job.Clr.WithLaunchCount(1));
	Add(new GCDiagnoser());
	}
	}

	[Params("Foo;Bar", "Foo;FooBar;Whatever", "Bar;blaat;foo", "blaat;foo;Bar", "foo;Bar;Blaat", "foo;FooBar;Blaat",
	"Bar1;Bar2;Bar3;Bar4;Bar", "Bar1;Bar2;Bar3;Bar4;NoMatch", "Foo;FooBar;Whatever", "Some;Other;Really;Interesting;Tokens")]
	public string Value { get; set; }

	public string Match = "Bar";
	private static char [] delimeter = new [] { ';' };

	[Benchmark(Baseline = true)]
	public bool StringSplit()
	{
	// To make is a fair comparision!
	if (string.IsNullOrEmpty(Match)) return false;
	if (string.IsNullOrEmpty(Value)) return false;

	var tokens = Value.Split(delimeter);
	foreach (var token in tokens)
	{
	if (token == Match)
	return true;
	}
	return false;
	}

	[Benchmark]
	public bool ContainsToken()
	{
	return ContainsToken(Value, Match);
	}

	[Benchmark]
	public bool ContainsTokenXoofx()
	{
	return ContainsTokenXoofx(Value, Match);
	}

	[Benchmark]
	public bool ContainsTokenXoofxUnsafe()
	{
	return ContainsTokenXoofxUnsafe(Value, Match);
	}

	[Benchmark]
	public bool ContainsTokenHellBrick()
	{
	return ContainsTokenHellBrick(Value, Match);
	}

	[Benchmark]
	public bool ContainsTokenMonty()
	{
	return ContainsTokenMonty(Value, Match);
	}

	/// <summary>
	/// Code taken from https://twitter.com/Nick_Craver/status/722741298575319040
	/// Checks whether the entire token exists, correctly handling seperators
	/// (optional at start and end) so: "Foo;Bar" contains "Bar", but
	/// "Foo;FooBar;Whatever" does not
	/// </summary>
	public static bool ContainsToken(string value, string token, char delimiter = ';')
	{
	if (string.IsNullOrEmpty(token)) return false;
	if (string.IsNullOrEmpty(value)) return false;

	int lastIndex = -1, idx, endIndex = value.Length - token.Length, tokenLength = token.Length;
	while ((idx = value.IndexOf(token, lastIndex + 1)) > lastIndex)
	{
	lastIndex = idx;
	if ((idx == 0 \|\| (value[idx - 1] == delimiter))
	&& (idx == endIndex \|\| (value[idx + tokenLength] == delimiter)))
	{
	return true;
	}
	}
	return false;
	}

	// See https://gist.github.com/mattwarren/f0594a9f3afa9377a4bbc2bcf8e573c5#gistcomment-1757392
	public static bool ContainsTokenXoofx(string value, string token, char delimiter = ';')
	{
	if (string.IsNullOrEmpty(token)) return false;
	if (string.IsNullOrEmpty(value)) return false;

	int length = value.Length;
	int tokenLength = token.Length;

	for (int i = 0; i < length; i++)
	{
	for (int j = 0; j < tokenLength; j++, i++)
	{
	if (i >= length \|\| value[i] != token[j])
	{
	goto next;
	}
	}

	if ((i == length \|\| (i < length && value[i] == delimiter)))
	{
	return true;
	}

	next:
	i = value.IndexOf(delimiter, i);
	if (i < 0)
	{
	break;
	}
	}

	return false;
	}

	public unsafe static bool ContainsTokenXoofxUnsafe(string value, string token, char delimiter = ';')
	{
	if (string.IsNullOrEmpty(token)) return false;
	if (string.IsNullOrEmpty(value)) return false;

	int length = value.Length;
	int tokenLength = token.Length;
	int i = 0;
	fixed (char* pValue = value)
	fixed (char* pToken = token)
	while (true)
	{
	for (int j = 0; j < tokenLength; j++, i++)
	{
	if (i >= length \|\| pValue[i] != pToken[j])
	{
	goto next;
	}
	}

	if (i == length \|\| pValue[i] == delimiter)
	{
	return true;
	}

	next:
	i = value.IndexOf(delimiter, i) + 1;
	if (i == 0)
	{
	break;
	}
	}

	return false;
	}

	public static unsafe bool ContainsTokenHellBrick(string value, string token, char delimiter = ';')
	{
	if (string.IsNullOrEmpty(token)) return false;
	if (string.IsNullOrEmpty(value)) return false;

	int length = value.Length;
	int tokenLength = token.Length;

	int currentTokenOffset = 0;
	int maxTokenOffset = length - tokenLength;

	while (currentTokenOffset <= maxTokenOffset)
	{
	int nextDelimiterOffset = value.IndexOf(delimiter, currentTokenOffset);
	if (nextDelimiterOffset < 0)
	nextDelimiterOffset = length;

	if (tokenLength == nextDelimiterOffset - currentTokenOffset)
	{
	for (int i = 0; i < tokenLength; i++)
	{
	if (value[currentTokenOffset + i] != token[i])
	goto next;
	}

	return true;
	}

	next:
	currentTokenOffset = nextDelimiterOffset + 1;
	}

	return false;
	}

	public static bool ContainsTokenMonty(string value, string token, char delimiter = ';')
	{
	// this code assumes you're running x64
	// it _works_ regardless, but some ifdef junk for x32
	// would be handy if you care about ideal x32 perf

	const int charsPerLong = sizeof(long) / sizeof(char); // 4
	const int charsPerInt = sizeof(int) / sizeof(char); // 2
	const int bytesPerChar = sizeof(char) / sizeof(byte); // 2

	if (string.IsNullOrEmpty(token)) return false;
	if (string.IsNullOrEmpty(value)) return false;

	var valueLength = value.Length;
	var tokenLength = token.Length;

	if (tokenLength > valueLength) return false;

	int tokenLongs;
	bool tokenTrailingInt, tokenTrailingChar;
	{
	var remainingChars = tokenLength;
	tokenLongs = remainingChars / charsPerLong;
	tokenTrailingInt = (tokenLength & 0x02) != 0;
	tokenTrailingChar = (tokenLength & 0x01) != 0;
	}

	var tokenByteLength = tokenLength * bytesPerChar;

	unsafe
	{
	fixed (char* valuePtr = value, tokenPtr = token)
	{
	var curValuePtr = valuePtr;
	var endValuePtr = valuePtr + valueLength;

	while (true)
	{
	// 8-byte steps
	long* tokenLongPtr = (long*)tokenPtr;
	{
	for (var i = 0; i < tokenLongs; i++)
	{
	var tokenLong = *tokenLongPtr;

	var valueLong = ((long)curValuePtr);

	if (tokenLong == valueLong)
	{
	// we have a match, continue
	tokenLongPtr++;
	curValuePtr += charsPerLong;
	continue;
	}
	else
	{
	goto advanceToNextDelimiter;
	}
	}
	}

	// can only be 1 4-byte value, 'cause if there were 2 it'd have been handled in the 8-byte stride
	int* tokenIntPtr = (int*)tokenLongPtr;
	if (tokenTrailingInt)
	{
	var tokenInt = *tokenIntPtr;

	var valueInt = ((int)curValuePtr);

	if (tokenInt == valueInt)
	{
	// we have a match, continue
	tokenIntPtr++;
	curValuePtr += charsPerInt;
	}
	else
	{
	goto advanceToNextDelimiter;
	}
	}

	// likewise, only 1 2-byte value or it'd be handled in the 4-byte check
	char* tokenCharPtr = (char*)tokenIntPtr;
	if (tokenTrailingChar)
	{
	var tokenChar = *tokenCharPtr;

	var valueChar = *curValuePtr;

	if (tokenChar == valueChar)
	{
	// we have a match, continue
	tokenCharPtr++;
	curValuePtr++;
	}
	else
	{
	goto advanceToNextDelimiter;
	}
	}

	// at this point, the full text of token has been matched, so we need to check if the _next_ char
	// in value is a delimiter or the end of value
	if (curValuePtr == endValuePtr \|\| *curValuePtr == delimiter)
	{
	return true;
	}

	// in this case, we found a string that starts with value, but is not value
	// "Foo" in "FooBar" for example


	// advance here to skip the rest of whatever's before
	// the next delimiter
	advanceToNextDelimiter:
	// our own IndexOf()-esque functionality
	// at the end of this curValuePtr points to the
	// character _after_ the next delimiter in the string
	//
	// not using IndexOf() is to save us the trouble
	// of tracking additional state, or doing extra work,
	// to move between the index-into-a-string world and
	// our constantly-advancing-pointers world
	while (true)
	{
	var curVal = *curValuePtr;
	if (curVal == delimiter)
	{
	// got one!
	curValuePtr++;

	if (curValuePtr == endValuePtr)
	{
	// but there wasn't anything after it
	return false;
	}
	break;
	}

	curValuePtr++;

	if (curValuePtr == endValuePtr)
	{
	// no more delimiters
	return false;
	}
	}

	// we only need to do this check after a delimiter is advanced
	// because this check _itself_ guarantees that we won't
	// exceed the end of value before we're done comparing
	// against token
	var remainingBytesInValue = ((byte)endValuePtr) - ((byte)curValuePtr); // we do this bytes to save an op
	if (remainingBytesInValue < tokenByteLength)
	{
	// not possible for token to fit in string, fail early
	return false;
	}
	}
	}
	}
	}
	}