Skip to content

Instantly share code, notes, and snippets.

@bikrone
Created July 6, 2015 11:13
Show Gist options
  • Save bikrone/c86bbb6096c777ffa25e to your computer and use it in GitHub Desktop.
Save bikrone/c86bbb6096c777ffa25e to your computer and use it in GitHub Desktop.
Test string finding performance (KMP, indexOf, Regex) in c#
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Diagnostics;
using System.Text.RegularExpressions;
namespace StringTest
{
class Program
{
static int[] lps = new int[10000010];
static void calculateLps(string s) {
lps[0] = -1;
int n = s.Length;
int previousIdentical;
for (int i=1; i<n; i++) {
lps[i] = -1;
previousIdentical = lps[i-1];
for (;;) {
if (s[previousIdentical+1] == s[i]) {
lps[i] = previousIdentical+1;
break;
}
if (previousIdentical == -1) break;
previousIdentical = lps[previousIdentical];
}
}
}
static void Main(string[] args)
{
string s = "Sed viderer efficiantur ea. Natum ferri prompta his id. Eirmod accusam cotidieque ut velOkay, so the codeCalling String.IndexOf() with StringComparison.Ordinal allows a more direct comparison, which is basically what my BoyerMoore class does. I tested it with a couple of variations of the class, with and without the multi-stage tables and support for case-insensitive searches but String.IndexOf() was just faster. Performance characterstics of String.IndexOf() suggest that it does not use the Boyer-Moore algorithm. Instead, what appears to be happening is that it calls into unmanaged code that likely consists of hand-optimized assembly language. As a programmer writing managed C# code, it appears that I just can't compete with tha in Listing 1 seems to work okay. Then why do I not recomend you use it? The problem is that it generally performs worse than String.IndexOf()!";
for (; ; )
{
s += s;
if (s.Length > 4000000) break;
}
string pattern = "Eirmod accusam cotidieque ut velOkay";
int m = pattern.Length;
int n = s.Length;
string ss = pattern + "|" + s;
// testing with KMP
var stopwatch = new Stopwatch();
stopwatch.Start();
int found = 0;
calculateLps(ss);
for (int i = 2 * m; i <= m + n; i++)
{
if (lps[i] == m - 1)
{
found++;
}
}
stopwatch.Stop();
var elapsed_time = stopwatch.ElapsedMilliseconds;
Console.WriteLine("1. Found {0} match(es) in {1} miliseconds", found, elapsed_time);
// testing with indexOf
found = 0;
stopwatch.Restart();
int index = 0;
while (true)
{
index = s.IndexOf(pattern, index);
if (index < 0) break;
index += m;
++found;
}
stopwatch.Stop();
elapsed_time = stopwatch.ElapsedMilliseconds;
Console.WriteLine("2. Found {0} match(es) in {1} miliseconds", found, elapsed_time);
// testing with regex
Regex reg = new Regex(pattern, RegexOptions.Compiled);
found = 0;
stopwatch.Restart();
index = 0;
while (true)
{
Match match = reg.Match(s, index);
if (!match.Success) break;
index = match.Index + m;
++found;
}
stopwatch.Stop();
elapsed_time = stopwatch.ElapsedMilliseconds;
Console.WriteLine("2. Found {0} match(es) in {1} miliseconds", found, elapsed_time);
Console.ReadLine();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment