Skip to content

Instantly share code, notes, and snippets.

@alfeg
Last active March 30, 2016 16:00
Show Gist options
  • Save alfeg/f6b96064ffe801f158659d3a0b1e8193 to your computer and use it in GitHub Desktop.
Save alfeg/f6b96064ffe801f158659d3a0b1e8193 to your computer and use it in GitHub Desktop.
String split - manual vs regex
BenchmarkDotNet=v0.9.4.0
OS=Microsoft Windows NT 6.2.9200.0
Processor=Intel(R) Core(TM) i7-3630QM CPU @ 2.40GHz, ProcessorCount=8
Frequency=2338450 ticks, Resolution=427.6337 ns, Timer=TSC
HostCLR=MS.NET 4.0.30319.42000, Arch=32-bit RELEASE [AttachedDebugger]
JitModules=clrjit-v4.6.1073.0

Type=StringSplitter  Mode=Throughput  
  Method | Platform |       Jit |     Median |    StdDev |

------------ |--------- |---------- |----------- |---------- | StringRegex | X64 | LegacyJit | 16.4518 us | 0.4475 us | StringRegex | X64 | RyuJit | 15.7437 us | 0.3336 us | StringRegex | X86 | LegacyJit | 16.4200 us | 0.3152 us | StringScan | X64 | LegacyJit | 7.0505 us | 0.1932 us | StringScan | X64 | RyuJit | 7.3239 us | 0.1604 us | StringScan | X86 | LegacyJit | 7.6890 us | 0.1363 us |

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
namespace BenchITSplit
{
class Program
{
static void Main(string[] args)
{
assertEqual_to_make_sure_both_algo_produce_same_result();
var summary = BenchmarkRunner.Run<StringSplitter>();
}
static void assertEqual_to_make_sure_both_algo_produce_same_result()
{
var splitter = new StringSplitter();
var regexed = StringSplitter.stringsToSplit.Select(s => splitter.StringRegex()).ToArray();
splitter.index = 0;
var scaned = StringSplitter.stringsToSplit.Select(s => splitter.StringScan()).ToArray();
if (regexed.Length != scaned.Length) throw new Exception("non equal length");
for (int i = 0; i < regexed.Length; i++)
{
var rx = regexed[i];
var sc = scaned[i];
if (rx.Length != sc.Length) throw new Exception("non equal length");
for (int j = 0; j < rx.Length; j++)
{
var rx_j = rx[j];
var cs_j = sc[j];
if (rx_j != cs_j)
{
throw new Exception("non equal");
}
}
}
}
}
[Config("jobs=AllJits")]
public class StringSplitter
{
public static List<string> stringsToSplit = null;
static StringSplitter()
{
// just generate some strings. Same random seed ensure that data will be same across run
var random = new Random(1000); stringsToSplit = new List<string>();
for (int i = 0; i < max; i++)
{
stringsToSplit.Add(string.Join("", GenerateRandom(random).Take(random.Next(70, 100))));
}
}
public int index = 0;
private static int max = 1000;
private Regex rx = new Regex("(?<!\r)\n", RegexOptions.Compiled);
private static IEnumerable<string> GenerateRandom(Random rnd)
{
var generator = new Utilities.Random.NameGenerators.MaleNameGenerator(true, true, true, true);
//var gen = new Utilities.Random.StringGenerators.PatternGenerator("@@##\r\n");
var endline = new[] { "\n", "\r\n" };
while (true)
{
var dice = rnd.Next(0, 100);
string end = "";
if (dice > 30 && dice <= 80) end = endline[0];
if (dice > 80) end = endline[1];
var name = generator.Next(rnd);
if (rnd.Next(100) > 90) name = "";
yield return name + end;
}
}
[Benchmark]
public string[] StringScan()
{
var value = stringsToSplit[index];
if (++index == max) index = 0;
return SplitByN(value).ToArray();
}
[Benchmark]
public string[] StringRegex()
{
var value = stringsToSplit[index];
if (++index == max) index = 0;
return rx.Split(value);
}
private IEnumerable<string> SplitByN(string value)
{
var subIndex = 0;
var i = value.IndexOf('\n');
while (i >= 0 && i < value.Length)
{
if (value[i] == '\n')
{
if (i <= 0 || value[i - 1] != '\r')
{
var res = value.Substring(subIndex, i - subIndex);
yield return res;
subIndex = i + 1;
}
}
i = value.IndexOf('\n', i + 1);
}
yield return value.Substring(subIndex);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment