Skip to content

Instantly share code, notes, and snippets.

Created November 25, 2011 17:05
Show Gist options
  • Save CheetahChrome/1393981 to your computer and use it in GitHub Desktop.
Save CheetahChrome/1393981 to your computer and use it in GitHub Desktop.
Regex Vs Xml
using System;
using System.Linq;
using System.IO;
using System.Xml;
using System.Diagnostics;
using System.Text.RegularExpressions;
using System.Xml.Linq;
namespace RegexVsXml
class Program
static void Main( string[] args )
Enumerable.Range( 1, 5 )
.ForEach( tstNumber =>
Console.WriteLine( "Test " + tstNumber );
Time( "Regex", RegexFindXml );
Time( "XmlLinq_Link_FR", XmlLinq_Link_FR );
Time( "XmlDoc_Hasim()", XmlDoc_Hasim );
Console.WriteLine( Environment.NewLine );
public static int RegexFindXml()
string pattern = @"(<url>\s*<loc>)";
return Directory.EnumerateFiles( @"D:\temp", "*.xml" )
.Sum( fl => Regex.Matches( File.ReadAllText( fl ), pattern ).OfType<Match>().Count() );
public static int XmlLinq_Link_FR()
XNamespace xn = "";
return Directory.EnumerateFiles( @"D:\temp", "*.xml" )
.Sum( fl => XElement.Load( fl ).Descendants( xn + "loc" ).Count() );
public static int XmlDoc_Hasim()
return Directory.EnumerateFiles( @"D:\temp", "*.xml" )
.Sum( fl =>
XmlDocument doc = new XmlDocument();
doc.LoadXml( System.IO.File.ReadAllText( fl ) );
if (doc.ChildNodes.Count > 0)
if (doc.ChildNodes[1].HasChildNodes)
return doc.ChildNodes[1].ChildNodes.Count;
return 0;
} );
public static void Time<T>( string what, Func<T> work )
var sw = Stopwatch.StartNew();
var result = work();
Console.WriteLine( "\t{0,-15} found {1} urls in {2}", what, result, sw.Elapsed );
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="">
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="">
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment