Skip to content

Instantly share code, notes, and snippets.

@mattwarren
Last active July 25, 2023 00:20
Show Gist options
  • Save mattwarren/f0594a9f3afa9377a4bbc2bcf8e573c5 to your computer and use it in GitHub Desktop.
Save mattwarren/f0594a9f3afa9377a4bbc2bcf8e573c5 to your computer and use it in GitHub Desktop.
[Config(typeof(Config))]
public class Program
{
static void Main(string[] args)
{
var summary = BenchmarkRunner.Run<Program>();
}
private class Config : ManualConfig
{
public Config()
{
Add(Job.Clr.WithLaunchCount(1));
Add(new GCDiagnoser());
}
}
[Params("Foo;Bar", "Foo;FooBar;Whatever", "Bar;blaat;foo", "blaat;foo;Bar", "foo;Bar;Blaat", "foo;FooBar;Blaat",
"Bar1;Bar2;Bar3;Bar4;Bar", "Bar1;Bar2;Bar3;Bar4;NoMatch", "Foo;FooBar;Whatever", "Some;Other;Really;Interesting;Tokens")]
public string Value { get; set; }
public string Match = "Bar";
private static char [] delimeter = new [] { ';' };
[Benchmark(Baseline = true)]
public bool StringSplit()
{
// To make is a fair comparision!
if (string.IsNullOrEmpty(Match)) return false;
if (string.IsNullOrEmpty(Value)) return false;
var tokens = Value.Split(delimeter);
foreach (var token in tokens)
{
if (token == Match)
return true;
}
return false;
}
[Benchmark]
public bool ContainsToken()
{
return ContainsToken(Value, Match);
}
[Benchmark]
public bool ContainsTokenXoofx()
{
return ContainsTokenXoofx(Value, Match);
}
[Benchmark]
public bool ContainsTokenXoofxUnsafe()
{
return ContainsTokenXoofxUnsafe(Value, Match);
}
[Benchmark]
public bool ContainsTokenHellBrick()
{
return ContainsTokenHellBrick(Value, Match);
}
[Benchmark]
public bool ContainsTokenMonty()
{
return ContainsTokenMonty(Value, Match);
}
/// <summary>
/// Code taken from https://twitter.com/Nick_Craver/status/722741298575319040
/// Checks whether the *entire* token exists, correctly handling seperators
/// (optional at start and end) so: "Foo;Bar" contains "Bar", but
/// "Foo;FooBar;Whatever" does not
/// </summary>
public static bool ContainsToken(string value, string token, char delimiter = ';')
{
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
int lastIndex = -1, idx, endIndex = value.Length - token.Length, tokenLength = token.Length;
while ((idx = value.IndexOf(token, lastIndex + 1)) > lastIndex)
{
lastIndex = idx;
if ((idx == 0 || (value[idx - 1] == delimiter))
&& (idx == endIndex || (value[idx + tokenLength] == delimiter)))
{
return true;
}
}
return false;
}
// See https://gist.github.com/mattwarren/f0594a9f3afa9377a4bbc2bcf8e573c5#gistcomment-1757392
public static bool ContainsTokenXoofx(string value, string token, char delimiter = ';')
{
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
int length = value.Length;
int tokenLength = token.Length;
for (int i = 0; i < length; i++)
{
for (int j = 0; j < tokenLength; j++, i++)
{
if (i >= length || value[i] != token[j])
{
goto next;
}
}
if ((i == length || (i < length && value[i] == delimiter)))
{
return true;
}
next:
i = value.IndexOf(delimiter, i);
if (i < 0)
{
break;
}
}
return false;
}
public unsafe static bool ContainsTokenXoofxUnsafe(string value, string token, char delimiter = ';')
{
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
int length = value.Length;
int tokenLength = token.Length;
int i = 0;
fixed (char* pValue = value)
fixed (char* pToken = token)
while (true)
{
for (int j = 0; j < tokenLength; j++, i++)
{
if (i >= length || pValue[i] != pToken[j])
{
goto next;
}
}
if (i == length || pValue[i] == delimiter)
{
return true;
}
next:
i = value.IndexOf(delimiter, i) + 1;
if (i == 0)
{
break;
}
}
return false;
}
public static unsafe bool ContainsTokenHellBrick(string value, string token, char delimiter = ';')
{
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
int length = value.Length;
int tokenLength = token.Length;
int currentTokenOffset = 0;
int maxTokenOffset = length - tokenLength;
while (currentTokenOffset <= maxTokenOffset)
{
int nextDelimiterOffset = value.IndexOf(delimiter, currentTokenOffset);
if (nextDelimiterOffset < 0)
nextDelimiterOffset = length;
if (tokenLength == nextDelimiterOffset - currentTokenOffset)
{
for (int i = 0; i < tokenLength; i++)
{
if (value[currentTokenOffset + i] != token[i])
goto next;
}
return true;
}
next:
currentTokenOffset = nextDelimiterOffset + 1;
}
return false;
}
public static bool ContainsTokenMonty(string value, string token, char delimiter = ';')
{
// this code assumes you're running x64
// it _works_ regardless, but some ifdef junk for x32
// would be handy if you care about ideal x32 perf
const int charsPerLong = sizeof(long) / sizeof(char); // 4
const int charsPerInt = sizeof(int) / sizeof(char); // 2
const int bytesPerChar = sizeof(char) / sizeof(byte); // 2
if (string.IsNullOrEmpty(token)) return false;
if (string.IsNullOrEmpty(value)) return false;
var valueLength = value.Length;
var tokenLength = token.Length;
if (tokenLength > valueLength) return false;
int tokenLongs;
bool tokenTrailingInt, tokenTrailingChar;
{
var remainingChars = tokenLength;
tokenLongs = remainingChars / charsPerLong;
tokenTrailingInt = (tokenLength & 0x02) != 0;
tokenTrailingChar = (tokenLength & 0x01) != 0;
}
var tokenByteLength = tokenLength * bytesPerChar;
unsafe
{
fixed (char* valuePtr = value, tokenPtr = token)
{
var curValuePtr = valuePtr;
var endValuePtr = valuePtr + valueLength;
while (true)
{
// 8-byte steps
long* tokenLongPtr = (long*)tokenPtr;
{
for (var i = 0; i < tokenLongs; i++)
{
var tokenLong = *tokenLongPtr;
var valueLong = *((long*)curValuePtr);
if (tokenLong == valueLong)
{
// we have a match, continue
tokenLongPtr++;
curValuePtr += charsPerLong;
continue;
}
else
{
goto advanceToNextDelimiter;
}
}
}
// can only be 1 4-byte value, 'cause if there were 2 it'd have been handled in the 8-byte stride
int* tokenIntPtr = (int*)tokenLongPtr;
if (tokenTrailingInt)
{
var tokenInt = *tokenIntPtr;
var valueInt = *((int*)curValuePtr);
if (tokenInt == valueInt)
{
// we have a match, continue
tokenIntPtr++;
curValuePtr += charsPerInt;
}
else
{
goto advanceToNextDelimiter;
}
}
// likewise, only 1 2-byte value or it'd be handled in the 4-byte check
char* tokenCharPtr = (char*)tokenIntPtr;
if (tokenTrailingChar)
{
var tokenChar = *tokenCharPtr;
var valueChar = *curValuePtr;
if (tokenChar == valueChar)
{
// we have a match, continue
tokenCharPtr++;
curValuePtr++;
}
else
{
goto advanceToNextDelimiter;
}
}
// at this point, the full text of token has been matched, so we need to check if the _next_ char
// in value is a delimiter or the end of value
if (curValuePtr == endValuePtr || *curValuePtr == delimiter)
{
return true;
}
// in this case, we found a string that starts with value, but is not value
// "Foo" in "FooBar" for example
// advance here to skip the rest of whatever's before
// the next delimiter
advanceToNextDelimiter:
// our own IndexOf()-esque functionality
// at the end of this curValuePtr points to the
// character _after_ the next delimiter in the string
//
// not using IndexOf() is to save us the trouble
// of tracking additional state, or doing extra work,
// to move between the index-into-a-string world and
// our constantly-advancing-pointers world
while (true)
{
var curVal = *curValuePtr;
if (curVal == delimiter)
{
// got one!
curValuePtr++;
if (curValuePtr == endValuePtr)
{
// but there wasn't anything after it
return false;
}
break;
}
curValuePtr++;
if (curValuePtr == endValuePtr)
{
// no more delimiters
return false;
}
}
// we only need to do this check after a delimiter is advanced
// because this check _itself_ guarantees that we won't
// exceed the end of value before we're done comparing
// against token
var remainingBytesInValue = ((byte*)endValuePtr) - ((byte*)curValuePtr); // we do this bytes to save an op
if (remainingBytesInValue < tokenByteLength)
{
// not possible for token to fit in string, fail early
return false;
}
}
}
}
}
}
@kevin-montrose
Copy link

Finally in a place where I can run the actual benchmark (instead of guesstimating). Added my last attempt.

Running 64-bit makes a substantial difference - which isn't surprising, I anticipated that in the comments for my original attempt. I'm posting the more favorable run (the 64-bit one), since there seems to be a divide in the previously posted tables (@FransBouma is in 32-bit, @mattwarren is in 64-bit).

I suspect ContainsTokenMonty's performance gap will increase as value and token increase in size, since the main non-algorithmic wins are in comparing multiple chars in single ops (up to 4 chars per op when comparing token, and 2 chars per op when searching for the delimiter). I haven't tested this suspicion.

BenchmarkDotNet-Dev=v0.9.4.0+
OS=Microsoft Windows NT 6.2.9200.0
Processor=Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz, ProcessorCount=8
Frequency=2533203 ticks, Resolution=394.7572 ns, Timer=TSC
HostCLR=MS.NET 4.0.30319.42000, Arch=64-bit RELEASE [RyuJIT]
JitModules=clrjit-v4.6.96.0

Type=Program  Mode=Throughput  Runtime=Clr  
LaunchCount=1  
Method Median StdDev Scaled Value Gen 0 Gen 1 Gen 2 Bytes Allocated/Op
ContainsToken 569.5728 ns 4.9277 ns 3.99 Bar1;Bar2;Bar3;Bar4;Bar - - - 0.06
ContainsTokenHellBrick 34.0393 ns 0.4630 ns 0.24 Bar1;Bar2;Bar3;Bar4;Bar - - - 0.00
ContainsTokenMonty 16.5055 ns 0.0676 ns 0.12 Bar1;Bar2;Bar3;Bar4;Bar - - - 0.00
ContainsTokenSingleScan 29.3718 ns 0.1476 ns 0.21 Bar1;Bar2;Bar3;Bar4;Bar - - - 0.00
ContainsTokenXoofx 37.3689 ns 0.2765 ns 0.26 Bar1;Bar2;Bar3;Bar4;Bar - - - 0.00
ContainsTokenXoofxUnsafe 32.9515 ns 0.5662 ns 0.23 Bar1;Bar2;Bar3;Bar4;Bar - - - 0.00
StringSplit 142.7695 ns 2.1492 ns 1.00 Bar1;Bar2;Bar3;Bar4;Bar 496.94 - - 154.86
ContainsToken 558.9586 ns 5.4237 ns 3.82 Bar1;Bar2;Bar3;Bar4;NoMatch - - - 0.06
ContainsTokenHellBrick 34.5302 ns 0.3591 ns 0.24 Bar1;Bar2;Bar3;Bar4;NoMatch - - - 0.00
ContainsTokenMonty 20.2167 ns 0.2271 ns 0.14 Bar1;Bar2;Bar3;Bar4;NoMatch - - - 0.00
ContainsTokenSingleScan 33.2238 ns 0.1712 ns 0.23 Bar1;Bar2;Bar3;Bar4;NoMatch - - - 0.00
ContainsTokenXoofx 36.5345 ns 0.1784 ns 0.25 Bar1;Bar2;Bar3;Bar4;NoMatch - - - 0.00
ContainsTokenXoofxUnsafe 35.2670 ns 0.7166 ns 0.24 Bar1;Bar2;Bar3;Bar4;NoMatch - - - 0.00
StringSplit 146.3651 ns 0.9008 ns 1.00 Bar1;Bar2;Bar3;Bar4;NoMatch 594.99 - - 183.93
ContainsToken 103.0207 ns 0.6338 ns 1.10 Bar;blaat;foo - - - 0.01
ContainsTokenHellBrick 11.0177 ns 0.1332 ns 0.12 Bar;blaat;foo - - - 0.00
ContainsTokenMonty 6.4689 ns 0.0406 ns 0.07 Bar;blaat;foo - - - 0.00
ContainsTokenSingleScan 6.1127 ns 0.0724 ns 0.07 Bar;blaat;foo - - - 0.00
ContainsTokenXoofx 6.1134 ns 0.0501 ns 0.07 Bar;blaat;foo - - - 0.00
ContainsTokenXoofxUnsafe 6.1312 ns 0.1400 ns 0.07 Bar;blaat;foo - - - 0.00
StringSplit 93.7291 ns 0.5546 ns 1.00 Bar;blaat;foo 277.50 - - 92.26
ContainsToken 114.1318 ns 1.2303 ns 1.57 Foo;Bar - - - 0.01
ContainsTokenHellBrick 17.0889 ns 0.1333 ns 0.24 Foo;Bar - - - 0.00
ContainsTokenMonty 8.7917 ns 0.1046 ns 0.12 Foo;Bar - - - 0.00
ContainsTokenSingleScan 11.4580 ns 0.2974 ns 0.16 Foo;Bar - - - 0.00
ContainsTokenXoofx 11.8141 ns 0.1413 ns 0.16 Foo;Bar - - - 0.00
ContainsTokenXoofxUnsafe 11.5833 ns 0.2368 ns 0.16 Foo;Bar - - - 0.00
StringSplit 72.6835 ns 0.5026 ns 1.00 Foo;Bar 193.84 - - 61.57
ContainsToken 238.9126 ns 1.7528 ns 2.25 Foo;FooBar;Whatever - - - 0.03
ContainsToken 238.1762 ns 1.4844 ns 2.24 Foo;FooBar;Whatever - - - 0.03
ContainsTokenHellBrick 21.3572 ns 0.2224 ns 0.20 Foo;FooBar;Whatever - - - 0.00
ContainsTokenHellBrick 21.2637 ns 0.1341 ns 0.20 Foo;FooBar;Whatever - - - 0.00
ContainsTokenMonty 15.8655 ns 0.0711 ns 0.15 Foo;FooBar;Whatever - - - 0.00
ContainsTokenMonty 16.0360 ns 0.4333 ns 0.15 Foo;FooBar;Whatever - - - 0.00
ContainsTokenSingleScan 20.2006 ns 0.0741 ns 0.19 Foo;FooBar;Whatever - - - 0.00
ContainsTokenSingleScan 20.2010 ns 0.0846 ns 0.19 Foo;FooBar;Whatever - - - 0.00
ContainsTokenXoofx 21.5653 ns 0.2646 ns 0.20 Foo;FooBar;Whatever - - - 0.00
ContainsTokenXoofx 21.6318 ns 0.3899 ns 0.20 Foo;FooBar;Whatever - - - 0.00
ContainsTokenXoofxUnsafe 21.3633 ns 0.1293 ns 0.20 Foo;FooBar;Whatever - - - 0.00
ContainsTokenXoofxUnsafe 20.1502 ns 0.1498 ns 0.19 Foo;FooBar;Whatever - - - 0.00
StringSplit 106.2978 ns 1.9544 ns 1.00 Foo;FooBar;Whatever 352.25 - - 114.61
StringSplit 105.5826 ns 1.3622 ns 0.99 Foo;FooBar;Whatever 352.25 - - 114.61
ContainsToken 193.5371 ns 1.4477 ns 1.20 Some;Other;Really;Interesting;Tokens - - - 0.03
ContainsTokenHellBrick 37.2887 ns 0.5070 ns 0.23 Some;Other;Really;Interesting;Tokens - - - 0.00
ContainsTokenMonty 24.8710 ns 0.4596 ns 0.15 Some;Other;Really;Interesting;Tokens - - - 0.00
ContainsTokenSingleScan 35.2233 ns 0.5154 ns 0.22 Some;Other;Really;Interesting;Tokens - - - 0.00
ContainsTokenXoofx 36.7675 ns 0.9916 ns 0.23 Some;Other;Really;Interesting;Tokens - - - 0.00
ContainsTokenXoofxUnsafe 33.4664 ns 0.4110 ns 0.21 Some;Other;Really;Interesting;Tokens - - - 0.00
StringSplit 161.6573 ns 2.5366 ns 1.00 Some;Other;Really;Interesting;Tokens 602.25 - - 185.70
ContainsToken 129.5446 ns 0.6051 ns 1.30 blaat;foo;Bar - - - 0.01
ContainsTokenHellBrick 23.4975 ns 0.3080 ns 0.24 blaat;foo;Bar - - - 0.00
ContainsTokenMonty 12.8415 ns 0.0968 ns 0.13 blaat;foo;Bar - - - 0.00
ContainsTokenSingleScan 17.0308 ns 0.1163 ns 0.17 blaat;foo;Bar - - - 0.00
ContainsTokenXoofx 18.0250 ns 0.1594 ns 0.18 blaat;foo;Bar - - - 0.00
ContainsTokenXoofxUnsafe 17.8055 ns 0.2144 ns 0.18 blaat;foo;Bar - - - 0.00
StringSplit 99.4195 ns 1.0491 ns 1.00 blaat;foo;Bar 243.52 - - 80.96
ContainsToken 114.5598 ns 0.9837 ns 1.17 foo;Bar;Blaat - - - 0.01
ContainsTokenHellBrick 17.2913 ns 0.1087 ns 0.18 foo;Bar;Blaat - - - 0.00
ContainsTokenMonty 9.0493 ns 0.0407 ns 0.09 foo;Bar;Blaat - - - 0.00
ContainsTokenSingleScan 11.2418 ns 0.1855 ns 0.12 foo;Bar;Blaat - - - 0.00
ContainsTokenXoofx 12.3910 ns 0.1124 ns 0.13 foo;Bar;Blaat - - - 0.00
ContainsTokenXoofxUnsafe 10.9651 ns 0.1700 ns 0.11 foo;Bar;Blaat - - - 0.00
StringSplit 97.6417 ns 0.9631 ns 1.00 foo;Bar;Blaat 271.19 - - 90.16
ContainsToken 232.8388 ns 0.9065 ns 2.31 foo;FooBar;Blaat - - - 0.03
ContainsTokenHellBrick 21.1562 ns 0.3597 ns 0.21 foo;FooBar;Blaat - - - 0.00
ContainsTokenMonty 14.6257 ns 0.1069 ns 0.14 foo;FooBar;Blaat - - - 0.00
ContainsTokenSingleScan 18.4191 ns 0.2883 ns 0.18 foo;FooBar;Blaat - - - 0.00
ContainsTokenXoofx 21.7312 ns 0.4207 ns 0.22 foo;FooBar;Blaat - - - 0.00
ContainsTokenXoofxUnsafe 20.1018 ns 0.1837 ns 0.20 foo;FooBar;Blaat - - - 0.00
StringSplit 100.9364 ns 0.5686 ns 1.00 foo;FooBar;Blaat 336.83 - - 110.89

@FransBouma
Copy link

FransBouma commented Apr 26, 2016

Nice! Interesting results, for sure :) Still glad I'm in second place in most tests (not all) ;).

@HellBrick
Copy link

This is epic! =)

@bbowyersmyth
Copy link

Ok I couldn't resist so here is another variation :)
https://gist.github.com/bbowyersmyth/c308aa62e871d44024c0bab552567961

Minimize the expense looking for a token as there is most likely not a match. Monty2 would probably be quicker on longer partial matches.
Use loop unrolling when looking for the next token.

BenchmarkDotNet=v0.9.4.0
OS=Microsoft Windows NT 6.2.9200.0
Processor=Intel(R) Core(TM) i5-6600 CPU @ 3.30GHz, ProcessorCount=4
Frequency=3234371 ticks, Resolution=309.1791 ns, Timer=TSC
HostCLR=MS.NET 4.0.30319.42000, Arch=64-bit RELEASE [RyuJIT]
JitModules=clrjit-v4.6.1078.0

Type=ContainsToken  Mode=Throughput  Platform=X64  
Jit=RyuJit  
Method Median StdDev Value
ContainsTokenUnroll 11.1478 ns 0.3191 ns Bar1;Bar2;Bar3;Bar4;Bar
ContainsTokenMonty2 15.4258 ns 0.2308 ns Bar1;Bar2;Bar3;Bar4;Bar
ContainsTokenUnroll 12.4808 ns 2.7918 ns Bar1;Bar2;Bar3;Bar4;NoMatch
ContainsTokenMonty2 17.0302 ns 0.3017 ns Bar1;Bar2;Bar3;Bar4;NoMatch
ContainsTokenUnroll 3.9933 ns 0.1028 ns Bar;blaat;foo
ContainsTokenMonty2 5.7593 ns 0.1258 ns Bar;blaat;foo
ContainsTokenUnroll 5.7880 ns 0.1428 ns Foo;Bar
ContainsTokenMonty2 8.0731 ns 0.1841 ns Foo;Bar
ContainsTokenUnroll 11.8456 ns 0.2204 ns Foo;FooBar;Whatever
ContainsTokenMonty2 15.0076 ns 0.2669 ns Foo;FooBar;Whatever
ContainsTokenUnroll 18.5287 ns 0.2764 ns Some;Other;Really;Interesting;Tokens
ContainsTokenMonty2 21.6304 ns 0.4823 ns Some;Other;Really;Interesting;Tokens
ContainsTokenUnroll 8.7534 ns 0.1295 ns blaat;foo;Bar
ContainsTokenMonty2 11.2296 ns 0.2047 ns blaat;foo;Bar
ContainsTokenUnroll 5.9761 ns 0.1274 ns foo;Bar;Blaat
ContainsTokenMonty2 8.1325 ns 0.2437 ns foo;Bar;Blaat
ContainsTokenUnroll 10.5742 ns 0.2372 ns foo;FooBar;Blaat
ContainsTokenMonty2 14.2297 ns 0.2002 ns foo;FooBar;Blaat

@ioctlLR
Copy link

ioctlLR commented May 25, 2016

I know I'm late to the game, but here's my take. On a Core i7-4810MQ @ 2.8GHz running 4.6.1 x64 w/ RyuJIT, it appears to consistently beat the previous winners (including Monty2). Enjoy!

public static bool ContainsTokenAfward(string value, string token, char delimiter = ';')
{
    if (token == null || value == null) return false;

    var valueLength = value.Length;
    var tokenLength = token.Length;

    // if the token has any length, we only need to make sure the value is long enough to hold it
    if (tokenLength == 0 || tokenLength > valueLength) return false;

    // algorithm::
    // check the first segment to see if it matches; return true if it does
    // look for {Delimiter}{token[0]} via int* cast
    // match the remaining characters of the token if found

    // we assume we're on little-endian and that strings are terminated by null
    // we also only do ordinal comparison

    unsafe
    {
        fixed (char* valueStartPtr = value, tokenStartPtr = token)
        {
            // keep up with where we are in the value and token strings
            var valuePtr = valueStartPtr;
            var tokenPtr = tokenStartPtr;

            // keep up with the ending pointer for the value
            var valueEndPtr = valueStartPtr + valueLength;

            // see if the first segment is the token
            if (*tokenPtr == *valuePtr)
            {
                // go through at a rate of 2 chars per check...
                while (*(uint*)++tokenPtr == *(uint*)++valuePtr)
                {
                    ++tokenPtr;
                    if (++valuePtr >= valueEndPtr)
                    {
                        // we're at the end of value and it matches
                        return true;
                    }
                }
                if (*tokenPtr == '\0')
                {
                    // we matched the whole token, so make sure the next char is a delimiter
                    if (*valuePtr == delimiter)
                    {
                        return true;
                    }
                }
                // we matched all but the final character, so check it and the delimiter
                else if (*tokenPtr == *valuePtr)
                {
                    if (*++valuePtr == delimiter)
                    {
                        return true;
                    }
                }
            }

            // set our search limit (no point going further if token hasn't been found yet)
            var valueEffEndPtr = valueEndPtr - tokenLength;

            // build a matching value that is the delimiter + the first token char
            var matchVal = (int)delimiter | ((int)*tokenStartPtr << 16);

            // save off the position of the 2nd token character for later...
            var resetPtr = tokenStartPtr + 1;

            // search through the string to find the correct location
            while (valuePtr < valueEffEndPtr)
            {
                // look for the next segment delimiter where the segment starts with the correct character
                var checkSegment = true;
                while (*(int*)valuePtr != matchVal)
                {
                    ++valuePtr;
                    if (valuePtr > valueEffEndPtr)
                    {
                        checkSegment = false;
                        break;
                    }
                }

                if (checkSegment)
                {
                    // valuePtr is pointing at the delimiter, so move to the 2nd char in the segment
                    valuePtr += 2;

                    // reset our matching pointer
                    tokenPtr = resetPtr;

                    // go through at a rate of 2 chars per check...
                    while (*(int*)tokenPtr == *(int*)valuePtr)
                    {
                        tokenPtr += 2;
                        if ((valuePtr += 2) >= valueEndPtr)
                        {
                            // we're at the end of value and it matches
                            return true;
                        }
                    }
                    if (*tokenPtr == '\0')
                    {
                        // we matched the whole token, so make sure the next char is a delimiter
                        if (*valuePtr == delimiter)
                        {
                            return true;
                        }
                    }
                    // we matched all but the final character, so check it and the delimiter
                    else if (*tokenPtr == *valuePtr)
                    {
                        if (*++valuePtr == delimiter)
                        {
                            return true;
                        }
                    }
                }
            }

            // we didn't find it
            return false;
        }
    }
}

@ioctlLR
Copy link

ioctlLR commented May 25, 2016

BTW, @kevin-montrose: I found that your algorithms won't support tokens larger than 6 chars anywhere but the beginning of the list. I haven't tried to debug them, though.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment