Created
July 6, 2015 11:13
-
-
Save bikrone/c86bbb6096c777ffa25e to your computer and use it in GitHub Desktop.
Test string finding performance (KMP, indexOf, Regex) in c#
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System; | |
using System.Collections.Generic; | |
using System.Linq; | |
using System.Text; | |
using System.Threading.Tasks; | |
using System.Diagnostics; | |
using System.Text.RegularExpressions; | |
namespace StringTest | |
{ | |
class Program | |
{ | |
static int[] lps = new int[10000010]; | |
static void calculateLps(string s) { | |
lps[0] = -1; | |
int n = s.Length; | |
int previousIdentical; | |
for (int i=1; i<n; i++) { | |
lps[i] = -1; | |
previousIdentical = lps[i-1]; | |
for (;;) { | |
if (s[previousIdentical+1] == s[i]) { | |
lps[i] = previousIdentical+1; | |
break; | |
} | |
if (previousIdentical == -1) break; | |
previousIdentical = lps[previousIdentical]; | |
} | |
} | |
} | |
static void Main(string[] args) | |
{ | |
string s = "Sed viderer efficiantur ea. Natum ferri prompta his id. Eirmod accusam cotidieque ut velOkay, so the codeCalling String.IndexOf() with StringComparison.Ordinal allows a more direct comparison, which is basically what my BoyerMoore class does. I tested it with a couple of variations of the class, with and without the multi-stage tables and support for case-insensitive searches but String.IndexOf() was just faster. Performance characterstics of String.IndexOf() suggest that it does not use the Boyer-Moore algorithm. Instead, what appears to be happening is that it calls into unmanaged code that likely consists of hand-optimized assembly language. As a programmer writing managed C# code, it appears that I just can't compete with tha in Listing 1 seems to work okay. Then why do I not recomend you use it? The problem is that it generally performs worse than String.IndexOf()!"; | |
for (; ; ) | |
{ | |
s += s; | |
if (s.Length > 4000000) break; | |
} | |
string pattern = "Eirmod accusam cotidieque ut velOkay"; | |
int m = pattern.Length; | |
int n = s.Length; | |
string ss = pattern + "|" + s; | |
// testing with KMP | |
var stopwatch = new Stopwatch(); | |
stopwatch.Start(); | |
int found = 0; | |
calculateLps(ss); | |
for (int i = 2 * m; i <= m + n; i++) | |
{ | |
if (lps[i] == m - 1) | |
{ | |
found++; | |
} | |
} | |
stopwatch.Stop(); | |
var elapsed_time = stopwatch.ElapsedMilliseconds; | |
Console.WriteLine("1. Found {0} match(es) in {1} miliseconds", found, elapsed_time); | |
// testing with indexOf | |
found = 0; | |
stopwatch.Restart(); | |
int index = 0; | |
while (true) | |
{ | |
index = s.IndexOf(pattern, index); | |
if (index < 0) break; | |
index += m; | |
++found; | |
} | |
stopwatch.Stop(); | |
elapsed_time = stopwatch.ElapsedMilliseconds; | |
Console.WriteLine("2. Found {0} match(es) in {1} miliseconds", found, elapsed_time); | |
// testing with regex | |
Regex reg = new Regex(pattern, RegexOptions.Compiled); | |
found = 0; | |
stopwatch.Restart(); | |
index = 0; | |
while (true) | |
{ | |
Match match = reg.Match(s, index); | |
if (!match.Success) break; | |
index = match.Index + m; | |
++found; | |
} | |
stopwatch.Stop(); | |
elapsed_time = stopwatch.ElapsedMilliseconds; | |
Console.WriteLine("2. Found {0} match(es) in {1} miliseconds", found, elapsed_time); | |
Console.ReadLine(); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment