Skip to content

Instantly share code, notes, and snippets.

@yallie
Last active March 11, 2021 02:07
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yallie/5008959 to your computer and use it in GitHub Desktop.
Save yallie/5008959 to your computer and use it in GitHub Desktop.
IndexOfAny for strings
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
struct Program
{
static void Main()
{
// benchmark
const int iterations = 1000000;
var sourceCode = File.ReadAllText("test.cs"); // "<< < re for /*><*/foreach return < >> > >>";
var prefixes = new[] { "<<", ">>", "<", ">", "/*", "*/", "for", "foreach", "return" };
// pre-setup for #1
var strings = prefixes.OrderByDescending(s => s.Length).ToLookup(s => s[0]);
var chars = strings.Select(g => g.Key).ToArray();
Validate("#1", sourceCode, pos => sourceCode.IndexOfAny(pos, chars, strings));
Benchmark("#1", iterations, pos => sourceCode.IndexOfAny(pos, chars, strings));
// pre-setup for #2
var index = RootIndex.FromStrings(prefixes);
Validate("#2", sourceCode, pos => sourceCode.IndexOfAny(pos, index));
Benchmark("#2", iterations, pos => sourceCode.IndexOfAny(pos, index));
Console.ReadKey();
}
static void Validate(string title, string sourceCode, Func<int, Tuple<int, int>> code)
{
var result = new List<Tuple<int, int>>();
var position = 0;
while (true)
{
var r = code(position);
if (r.Item1 < 0)
{
break;
}
result.Add(r);
position = r.Item1 + r.Item2;
}
Func<Tuple<int, int>, string> transform = i =>
sourceCode.Substring(i.Item1, i.Item2) + " 0x" + i.Item1.ToString("X");
Console.WriteLine("----------------------------");
Console.WriteLine("{1} Result: {0}", string.Join(", ", result.Select(transform)), title);
}
static void Benchmark(string title, int iterations, Func<int, Tuple<int, int>> code)
{
var position = 0;
var sw = Stopwatch.StartNew();
for (var i = 0; i < iterations; i++)
{
var r = code(position);
if (r.Item1 < 0)
{
position = 0;
}
}
sw.Stop();
Console.WriteLine("{2} Time elapsed: {0}, single iteration: {1} ms", sw.Elapsed, sw.Elapsed.TotalMilliseconds / iterations, title);
}
}
public static class StringExtensions
{
// version #1, dead simple
public static Tuple<int, int> IndexOfAny(this string sourceCode, int position, char[] chars, ILookup<char, string> strings)
{
while (position < sourceCode.Length)
{
var index = sourceCode.IndexOfAny(chars, position);
if (index < 0)
{
return Tuple.Create(index, 0);
}
var @char = sourceCode[index];
foreach (var s in strings[@char])
{
if (string.CompareOrdinal(sourceCode, index + 1, s, 1, s.Length - 1) == 0)
{
return Tuple.Create(index, s.Length);
}
}
position = index + 1;
}
return Tuple.Create(-1, 0);
}
// version #2, ugly and optimized
public static Tuple<int, int> IndexOfAny(this string sourceCode, int position, RootIndex rootIndex)
{
while (position < sourceCode.Length)
{
var index = sourceCode.IndexOfAny(rootIndex.Chars, position);
if (index < 0)
{
return Tuple.Create(index, 0);
}
// only one character is available
var @char = sourceCode[index];
if (sourceCode.Length == index + 1)
{
if (rootIndex.SingleChars.Contains(@char))
{
return Tuple.Create(index, 1);
}
return Tuple.Create(-1, 0);
}
// only two characters available
var leafIndex = rootIndex.MultipleChars[@char];
var nextChar = sourceCode[index + 1];
if (sourceCode.Length == index + 2)
{
if (leafIndex.SingleChars.Contains(nextChar))
{
return Tuple.Create(index, 2);
}
if (rootIndex.Chars.Contains(nextChar))
{
return Tuple.Create(index + 1, 1);
}
return Tuple.Create(-1, 0);
}
// several characters are available
if (leafIndex.Chars.Contains(nextChar))
{
foreach (var s in leafIndex.Strings[nextChar])
{
if (string.CompareOrdinal(sourceCode, index + 2, s, 0, s.Length) == 0)
{
return Tuple.Create(index, s.Length + 2);
}
}
}
if (leafIndex.SingleChars.Contains(nextChar))
{
return Tuple.Create(index, 2);
}
if (rootIndex.SingleChars.Contains(@char))
{
return Tuple.Create(index, 1);
}
position = index + 1;
}
return Tuple.Create(-1, 0);
}
}
// helper structures for version #2
public class RootIndex
{
public char[] Chars;
public string SingleChars;
public IDictionary<char, LeafIndex> MultipleChars;
public static RootIndex FromStrings(params string[] strings)
{
var idx = strings
.Where(s => !string.IsNullOrEmpty(s))
.ToLookup(s => s[0], s => s.Substring(1))
.ToDictionary(g => g.Key, g => LeafIndex.FromStrings(g));
return new RootIndex
{
MultipleChars = idx,
Chars = idx.Keys.ToArray(),
SingleChars = new string(strings.Where(s => s != null && s.Length == 1).Select(s => s[0]).ToArray())
};
}
}
public class LeafIndex
{
public string Chars;
public string SingleChars;
public Dictionary<char, string[]> Strings;
public static LeafIndex FromStrings(IEnumerable<string> strings)
{
var str = strings
.Where(s => !string.IsNullOrEmpty(s))
.ToLookup(s => s[0], s => s.Substring(1))
.ToDictionary(g => g.Key, g => g.OrderByDescending(s => s.Length).Where(s => !string.IsNullOrEmpty(s)).ToArray());
return new LeafIndex
{
Strings = str,
Chars = new string(str.Keys.ToArray()),
SingleChars = new string(strings.Where(s => s != null && s.Length == 1).Select(s => s[0]).ToArray())
};
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment