Mike Dalessio flavorjones

## _Results.txt
For an html snippet 2374 bytes long ...
                          user     system      total        real
regex * 1000          0.160000   0.010000   0.170000 (  0.182207)
nokogiri * 1000       1.440000   0.060000   1.500000 (  1.537546)
hpricot * 1000        5.740000   0.650000   6.390000 (  6.401207)

it took an average of 0.0015 seconds for Nokogiri to parse and operate on an HTML snippet 2374 bytes long
it took an average of 0.0064 seconds for Hpricot to parse and operate on an HTML snippet 2374 bytes long

For an html snippet 97517 bytes long ...

## gist:29113
require 'rubygems'
require 'nokogiri'

class Nokogiri::XML::Node
  def method_missing name, *args, &block
    if args.empty?
      list = xpath("//#{name}")
    elsif args.first.is_a? Hash
      hash = args.first
      if hash[:css]

## gist:32111
diff --git a/test/test_sanitizer.rb b/test/test_sanitizer.rb
index 22a99ee..5b8e0ba 100644
--- a/test/test_sanitizer.rb
+++ b/test/test_sanitizer.rb
@@ -38,7 +38,11 @@ class SanitizeTest < Test::Unit::TestCase
 #         xhtmloutput = htmloutput
 #         rexmloutput = "<image title='1'>foo &lt;bad&gt;bar&lt;/bad&gt; baz</image>"
       if WhiteList::VOID_ELEMENTS.include?(tag_name)
+        if Nokogiri::LIBXML_VERSION >= "2.6.16"
+          htmloutput = "<#{tag_name} title='1'/><p>foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>"

## gist:43658
# proposed dryopteris API
# returned object with singleton method body()

require 'rubygems'
require 'dryopteris'

haxxored_doc = "<html><head></head><body>haxxored!<script src='http://haxxored.com'></script></body></html>"

sanitized_doc = Dryopteris.sanitize(haxxored_doc)
puts sanitized_doc # => "<html><head></head><body>haxxored!&lt;script src="http://haxxored.com"/&gt;</body></html>"

## gist:58476
# bash function "awkp" for grabbing fields from output
# e.g., "ps -ef | fgrep mongrel | awkp 2 | xargs kill"

function awkp {
    narg=$1
    awk "{print \$$1}"
}

## libxml.rb
require 'ffi'

module Nokogiri
  module LibXML

    extend FFI::Library

    ffi_lib "/usr/lib/libxml2.so"

    callback :start_document_sax_func, [:pointer], :void

## leaktest.rb
#! /usr/bin/ruby
#
# memory leak?
#
require 'nokogiri'

1000.times do
  doc = Nokogiri::XML("<root><item>1</item></root>")
  doc.at('item').remove
  putc "."

## gist:79869
# in response to El_Matador, one way to search for regular expressions using Nokogiri

require 'rubygems'
require 'nokogiri'

value = Nokogiri::HTML.parse(<<-HTML_END)
  "<html>
    <body>
      <p id='para-1'>A</p>
      <p id='para-22'>B</p>

## gist:98306
#! /usr/bin/ruby

require 'rubygems'
require 'nokogiri'

HTML = '<b>http://foo.com/</b><i>http://bar.com</i><b>no match here</b>'

doc = Nokogiri::HTML(HTML)

puts doc.xpath("//b[contains(text(),'http')]") # => <b>http://foo.com/</b>

## gist:109879
#! /usr/bin/ruby
# reproducing bug reported in http://github.com/tenderlove/nokogiri/issues/#issue/35

require 'rubygems'
require 'nokogiri'

iframe = %Q{<iframe style="width: 0%; height: 0px" src="http://someurl" frameborder=0 scrolling=no allowtransparency></iframe>}

doc = Nokogiri::HTML "<html><body><p></p></body></html>"
	For an html snippet 2374 bytes long ...
	user system total real
	regex * 1000 0.160000 0.010000 0.170000 ( 0.182207)
	nokogiri * 1000 1.440000 0.060000 1.500000 ( 1.537546)
	hpricot * 1000 5.740000 0.650000 6.390000 ( 6.401207)

	it took an average of 0.0015 seconds for Nokogiri to parse and operate on an HTML snippet 2374 bytes long
	it took an average of 0.0064 seconds for Hpricot to parse and operate on an HTML snippet 2374 bytes long

	For an html snippet 97517 bytes long ...
	require 'rubygems'
	require 'nokogiri'

	class Nokogiri::XML::Node
	def method_missing name, *args, &block
	if args.empty?
	list = xpath("//#{name}")
	elsif args.first.is_a? Hash
	hash = args.first
	if hash[:css]
	diff --git a/test/test_sanitizer.rb b/test/test_sanitizer.rb
	index 22a99ee..5b8e0ba 100644
	--- a/test/test_sanitizer.rb
	+++ b/test/test_sanitizer.rb
	@@ -38,7 +38,11 @@ class SanitizeTest < Test::Unit::TestCase
	# xhtmloutput = htmloutput
	# rexmloutput = "<image title='1'>foo <bad>bar</bad> baz</image>"
	if WhiteList::VOID_ELEMENTS.include?(tag_name)
	+ if Nokogiri::LIBXML_VERSION >= "2.6.16"
	+ htmloutput = "<#{tag_name} title='1'/><p>foo <bad>bar</bad> baz</p>"
	# proposed dryopteris API
	# returned object with singleton method body()

	require 'rubygems'
	require 'dryopteris'

	haxxored_doc = "<html><head></head><body>haxxored!<script src='http://haxxored.com'></script></body></html>"

	sanitized_doc = Dryopteris.sanitize(haxxored_doc)
	puts sanitized_doc # => "<html><head></head><body>haxxored!<script src="http://haxxored.com"/></body></html>"
	# bash function "awkp" for grabbing fields from output
	# e.g., "ps -ef \| fgrep mongrel \| awkp 2 \| xargs kill"

	function awkp {
	narg=$1
	awk "{print \$$1}"
	}
	require 'ffi'

	module Nokogiri
	module LibXML

	extend FFI::Library

	ffi_lib "/usr/lib/libxml2.so"

	callback :start_document_sax_func, [:pointer], :void
	#! /usr/bin/ruby
	#
	# memory leak?
	#
	require 'nokogiri'

	1000.times do
	doc = Nokogiri::XML("<root><item>1</item></root>")
	doc.at('item').remove
	putc "."
	# in response to El_Matador, one way to search for regular expressions using Nokogiri

	require 'rubygems'
	require 'nokogiri'

	value = Nokogiri::HTML.parse(<<-HTML_END)
	"<html>
	<body>
	<p id='para-1'>A</p>
	<p id='para-22'>B</p>
	#! /usr/bin/ruby
	# reproducing bug reported in http://github.com/tenderlove/nokogiri/issues/#issue/35

	require 'rubygems'
	require 'nokogiri'

	iframe = %Q{<iframe style="width: 0%; height: 0px" src="http://someurl" frameborder=0 scrolling=no allowtransparency></iframe>}

	doc = Nokogiri::HTML "<html><body><p></p></body></html>"