Last active
March 11, 2021 02:07
-
-
Save yallie/5008959 to your computer and use it in GitHub Desktop.
IndexOfAny for strings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Linq; | |
struct Program | |
{ | |
static void Main() | |
{ | |
// benchmark | |
const int iterations = 1000000; | |
var sourceCode = File.ReadAllText("test.cs"); // "<< < re for /*><*/foreach return < >> > >>"; | |
var prefixes = new[] { "<<", ">>", "<", ">", "/*", "*/", "for", "foreach", "return" }; | |
// pre-setup for #1 | |
var strings = prefixes.OrderByDescending(s => s.Length).ToLookup(s => s[0]); | |
var chars = strings.Select(g => g.Key).ToArray(); | |
Validate("#1", sourceCode, pos => sourceCode.IndexOfAny(pos, chars, strings)); | |
Benchmark("#1", iterations, pos => sourceCode.IndexOfAny(pos, chars, strings)); | |
// pre-setup for #2 | |
var index = RootIndex.FromStrings(prefixes); | |
Validate("#2", sourceCode, pos => sourceCode.IndexOfAny(pos, index)); | |
Benchmark("#2", iterations, pos => sourceCode.IndexOfAny(pos, index)); | |
Console.ReadKey(); | |
} | |
static void Validate(string title, string sourceCode, Func<int, Tuple<int, int>> code) | |
{ | |
var result = new List<Tuple<int, int>>(); | |
var position = 0; | |
while (true) | |
{ | |
var r = code(position); | |
if (r.Item1 < 0) | |
{ | |
break; | |
} | |
result.Add(r); | |
position = r.Item1 + r.Item2; | |
} | |
Func<Tuple<int, int>, string> transform = i => | |
sourceCode.Substring(i.Item1, i.Item2) + " 0x" + i.Item1.ToString("X"); | |
Console.WriteLine("----------------------------"); | |
Console.WriteLine("{1} Result: {0}", string.Join(", ", result.Select(transform)), title); | |
} | |
static void Benchmark(string title, int iterations, Func<int, Tuple<int, int>> code) | |
{ | |
var position = 0; | |
var sw = Stopwatch.StartNew(); | |
for (var i = 0; i < iterations; i++) | |
{ | |
var r = code(position); | |
if (r.Item1 < 0) | |
{ | |
position = 0; | |
} | |
} | |
sw.Stop(); | |
Console.WriteLine("{2} Time elapsed: {0}, single iteration: {1} ms", sw.Elapsed, sw.Elapsed.TotalMilliseconds / iterations, title); | |
} | |
} | |
public static class StringExtensions | |
{ | |
// version #1, dead simple | |
public static Tuple<int, int> IndexOfAny(this string sourceCode, int position, char[] chars, ILookup<char, string> strings) | |
{ | |
while (position < sourceCode.Length) | |
{ | |
var index = sourceCode.IndexOfAny(chars, position); | |
if (index < 0) | |
{ | |
return Tuple.Create(index, 0); | |
} | |
var @char = sourceCode[index]; | |
foreach (var s in strings[@char]) | |
{ | |
if (string.CompareOrdinal(sourceCode, index + 1, s, 1, s.Length - 1) == 0) | |
{ | |
return Tuple.Create(index, s.Length); | |
} | |
} | |
position = index + 1; | |
} | |
return Tuple.Create(-1, 0); | |
} | |
// version #2, ugly and optimized | |
public static Tuple<int, int> IndexOfAny(this string sourceCode, int position, RootIndex rootIndex) | |
{ | |
while (position < sourceCode.Length) | |
{ | |
var index = sourceCode.IndexOfAny(rootIndex.Chars, position); | |
if (index < 0) | |
{ | |
return Tuple.Create(index, 0); | |
} | |
// only one character is available | |
var @char = sourceCode[index]; | |
if (sourceCode.Length == index + 1) | |
{ | |
if (rootIndex.SingleChars.Contains(@char)) | |
{ | |
return Tuple.Create(index, 1); | |
} | |
return Tuple.Create(-1, 0); | |
} | |
// only two characters available | |
var leafIndex = rootIndex.MultipleChars[@char]; | |
var nextChar = sourceCode[index + 1]; | |
if (sourceCode.Length == index + 2) | |
{ | |
if (leafIndex.SingleChars.Contains(nextChar)) | |
{ | |
return Tuple.Create(index, 2); | |
} | |
if (rootIndex.Chars.Contains(nextChar)) | |
{ | |
return Tuple.Create(index + 1, 1); | |
} | |
return Tuple.Create(-1, 0); | |
} | |
// several characters are available | |
if (leafIndex.Chars.Contains(nextChar)) | |
{ | |
foreach (var s in leafIndex.Strings[nextChar]) | |
{ | |
if (string.CompareOrdinal(sourceCode, index + 2, s, 0, s.Length) == 0) | |
{ | |
return Tuple.Create(index, s.Length + 2); | |
} | |
} | |
} | |
if (leafIndex.SingleChars.Contains(nextChar)) | |
{ | |
return Tuple.Create(index, 2); | |
} | |
if (rootIndex.SingleChars.Contains(@char)) | |
{ | |
return Tuple.Create(index, 1); | |
} | |
position = index + 1; | |
} | |
return Tuple.Create(-1, 0); | |
} | |
} | |
// helper structures for version #2 | |
public class RootIndex | |
{ | |
public char[] Chars; | |
public string SingleChars; | |
public IDictionary<char, LeafIndex> MultipleChars; | |
public static RootIndex FromStrings(params string[] strings) | |
{ | |
var idx = strings | |
.Where(s => !string.IsNullOrEmpty(s)) | |
.ToLookup(s => s[0], s => s.Substring(1)) | |
.ToDictionary(g => g.Key, g => LeafIndex.FromStrings(g)); | |
return new RootIndex | |
{ | |
MultipleChars = idx, | |
Chars = idx.Keys.ToArray(), | |
SingleChars = new string(strings.Where(s => s != null && s.Length == 1).Select(s => s[0]).ToArray()) | |
}; | |
} | |
} | |
public class LeafIndex | |
{ | |
public string Chars; | |
public string SingleChars; | |
public Dictionary<char, string[]> Strings; | |
public static LeafIndex FromStrings(IEnumerable<string> strings) | |
{ | |
var str = strings | |
.Where(s => !string.IsNullOrEmpty(s)) | |
.ToLookup(s => s[0], s => s.Substring(1)) | |
.ToDictionary(g => g.Key, g => g.OrderByDescending(s => s.Length).Where(s => !string.IsNullOrEmpty(s)).ToArray()); | |
return new LeafIndex | |
{ | |
Strings = str, | |
Chars = new string(str.Keys.ToArray()), | |
SingleChars = new string(strings.Where(s => s != null && s.Length == 1).Select(s => s[0]).ToArray()) | |
}; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment