Skip to content

Instantly share code, notes, and snippets.

@neuecc
Created January 22, 2014 16:33
Show Gist options
  • Save neuecc/8561992 to your computer and use it in GitHub Desktop.
Save neuecc/8561992 to your computer and use it in GitHub Desktop.
using Gumbo.Wrappers;
using Sgml;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Net;
using System.Xml.Linq;
namespace ConsoleApplication55
{
class Program
{
static void Bench(string label, Action<string> action)
{
var html = new WebClient().DownloadString("https://github.com/google/gumbo-parser");
var sw = Stopwatch.StartNew();
for (int i = 0; i < 100; i++)
{
action(html);
}
sw.Stop();
Console.WriteLine(label + ":" + sw.Elapsed);
}
static void Main(string[] args)
{
Bench("SgmlReader", html =>
{
XDocument xdoc;
using (var tr = new StringReader(html))
using (var sgml = new SgmlReader() { InputStream = tr })
{
xdoc = XDocument.Load(sgml);
}
var hogehogehoge = xdoc.Descendants("li")
.Select(x => x.Value)
.ToArray();
});
Bench("HtmlAgilityPack", html =>
{
var doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(html);
var hogehoge = doc.DocumentNode.Descendants("li")
.Select(x => x.InnerText)
.ToArray();
});
Bench("Gumbo", html =>
{
var gumboParse = new Gumbo.Wrappers.GumboWrapper(html);
var hoge = gumboParse.Document.Children
.TraverseDepthFirst(x => x.Children) // (IxのExpandがBreadthFirstでツカエネーと思った)
.OfType<ElementWrapper>()
.Where(x => x.NormalizedTagName == "li")
.Select(x => x.Value)
.ToArray();
});
}
}
public static class Extensions
{
public static IEnumerable<T> TraverseDepthFirst<T>(this IEnumerable<T> source, Func<T, IEnumerable<T>> selector)
{
foreach (var item in source)
{
yield return item;
foreach (var subitem in selector(item).TraverseDepthFirst(selector))
{
yield return subitem;
}
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment