cowboy/jquery.ba-htmldoc.js

## readme.txt
From the jQuery API docs for .load():

jQuery uses the browser's .innerHTML property to parse the retrieved
document and insert it into the current document. During this process,
browsers often filter elements from the document such as <html>,
<title>, or <head> elements. As a result, the elements retrieved by
.load() may not be exactly the same as if the document were retrieved
directly by the browser.

Using jQuery, and given this test.html:

<!DOCTYPE HTML>
<html lang="en-US">
<head>
  <title>Test page</title>
</head>
<body>
  <div id="content">
    <p>stuff</p>
    <p>more stuff</p>
  </div>
</body>
</html>

This behavior can be seen:

$.get( 'test.html', function( html ) {
  // Not great: [, <title>Test page</title>, , <div id="content">…</div>, ]
  console.log( $(html) );

  // This fails: []
  console.log( $(html).find( '#content') );

  // This selects the content div, but.. ugly.
  console.log( $(html).filter( '#content') );

  // This also selects the content div, but.. also ugly.
  console.log( $('<div/>').html( html ).find( '#content' ) );
});

This, on the other hand, works as you'd expect, and attributes should
be properly preserved:

$.get( 'test.html', function( html ) {
  var hd = $.htmlDoc( html );

  console.log( hd.filter( 'html' ).length ); // 1
  console.log( hd.filter( 'html' ).attr( 'lang' ) ); // "en-US"
  console.log( hd.find( 'head' ).length ); // 1
  console.log( hd.find( 'body' ).length ); // 1
});

## jquery.ba-htmldoc.js
/*!
 * jQuery htmlDoc "fixer" - v0.2pre - 8/8/2011
 * http://benalman.com/projects/jquery-misc-plugins/
 *
 * Copyright (c) 2010 "Cowboy" Ben Alman
 * Dual licensed under the MIT and GPL licenses.
 * http://benalman.com/about/license/
 */

(function($) {
  // RegExp that matches opening and closing browser-stripped tags.
  // $1 = slash, $2 = tag name, $3 = attributes
  var matchTag = /<(\/?)(html|head|body|title|base|meta)(\s+[^>]*)?>/ig;
  // Unique id prefix for selecting placeholder elements.
  var prefix = 'hd' + +new Date;
  // A node under which a temporary DOM tree can be constructed.
  var parent;

  $.htmlDoc = function(html) {
    // A collection of "intended" elements that can't be rendered cross-browser
    // with .innerHTML, for which placeholders must be swapped.
    var elems = $();
    // Input HTML string, parsed to include placeholder DIVs. Replace HTML,
    // HEAD, BODY tags with DIV placeholders.
    var htmlParsed = html.replace(matchTag, function(tag, slash, name, attrs) {
      // Temporary object in which to hold attributes.
      var obj = {};
      // If this is an opening tag...
      if ( !slash ) {
        // Add an element of this name into the collection of elements. Note
        // that if a string of attributes is added at this point, it fails.
        elems = elems.add('<' + name + '/>');
        // If the original tag had attributes, create a temporary div with
        // those attributes. Then, copy each attribute from the temporary div
        // over to the temporary object.
        if ( attrs ) {
          $.each($('<div' + attrs + '/>')[0].attributes, function(i, attr) {
            obj[attr.name] = attr.value;
          });
        }
        // Set the attributes of the intended object based on the attributes
        // copied in the previous step.
        elems.eq(-1).attr(obj);
      }
      // A placeholder div with a unique id replaces the intended element's
      // tag in the parsed HTML string.
      return '<' + slash + 'div'
        + (slash ? '' : ' id="' + prefix + (elems.length - 1) + '"') + '>';
    });

    // If no placeholder elements were necessary, just return normal
    // jQuery-parsed HTML.
    if ( !elems.length ) {
      return $(html);
    }
    // Create parent node if it hasn't been created yet.
    if ( !parent ) {
      parent = $('<div/>');
    }
    // Create the parent node and append the parsed, place-held HTML.
    parent.html(htmlParsed);
    // Replace each placeholder element with its intended element.
    $.each(elems, function(i) {
      var elem = parent.find('#' + prefix + i).before(elems[i]);
      elems.eq(i).html(elem.contents());
      elem.remove();
    });
    // Return the topmost intended element(s), sans text nodes, while removing
    // them from the parent element with unwrap.
    return parent.children().unwrap();
  };

}(jQuery));
	From the jQuery API docs for .load():

	jQuery uses the browser's .innerHTML property to parse the retrieved
	document and insert it into the current document. During this process,
	browsers often filter elements from the document such as <html>,
	<title>, or <head> elements. As a result, the elements retrieved by
	.load() may not be exactly the same as if the document were retrieved
	directly by the browser.

	Using jQuery, and given this test.html:

	<!DOCTYPE HTML>
	<html lang="en-US">
	<head>
	<title>Test page</title>
	</head>
	<body>
	<div id="content">
	<p>stuff</p>
	<p>more stuff</p>
	</div>
	</body>
	</html>

	This behavior can be seen:

	$.get( 'test.html', function( html ) {
	// Not great: [, <title>Test page</title>, , <div id="content">…</div>, ]
	console.log( $(html) );

	// This fails: []
	console.log( $(html).find( '#content') );

	// This selects the content div, but.. ugly.
	console.log( $(html).filter( '#content') );

	// This also selects the content div, but.. also ugly.
	console.log( $('<div/>').html( html ).find( '#content' ) );
	});

	This, on the other hand, works as you'd expect, and attributes should
	be properly preserved:

	$.get( 'test.html', function( html ) {
	var hd = $.htmlDoc( html );

	console.log( hd.filter( 'html' ).length ); // 1
	console.log( hd.filter( 'html' ).attr( 'lang' ) ); // "en-US"
	console.log( hd.find( 'head' ).length ); // 1
	console.log( hd.find( 'body' ).length ); // 1
	});
	/*!
	* jQuery htmlDoc "fixer" - v0.2pre - 8/8/2011
	* http://benalman.com/projects/jquery-misc-plugins/
	*
	* Copyright (c) 2010 "Cowboy" Ben Alman
	* Dual licensed under the MIT and GPL licenses.
	* http://benalman.com/about/license/
	*/

	(function($) {
	// RegExp that matches opening and closing browser-stripped tags.
	// $1 = slash, $2 = tag name, $3 = attributes
	var matchTag = /<(\/?)(html\|head\|body\|title\|base\|meta)(\s+[^>]*)?>/ig;
	// Unique id prefix for selecting placeholder elements.
	var prefix = 'hd' + +new Date;
	// A node under which a temporary DOM tree can be constructed.
	var parent;

	$.htmlDoc = function(html) {
	// A collection of "intended" elements that can't be rendered cross-browser
	// with .innerHTML, for which placeholders must be swapped.
	var elems = $();
	// Input HTML string, parsed to include placeholder DIVs. Replace HTML,
	// HEAD, BODY tags with DIV placeholders.
	var htmlParsed = html.replace(matchTag, function(tag, slash, name, attrs) {
	// Temporary object in which to hold attributes.
	var obj = {};
	// If this is an opening tag...
	if ( !slash ) {
	// Add an element of this name into the collection of elements. Note
	// that if a string of attributes is added at this point, it fails.
	elems = elems.add('<' + name + '/>');
	// If the original tag had attributes, create a temporary div with
	// those attributes. Then, copy each attribute from the temporary div
	// over to the temporary object.
	if ( attrs ) {
	$.each($('<div' + attrs + '/>')[0].attributes, function(i, attr) {
	obj[attr.name] = attr.value;
	});
	}
	// Set the attributes of the intended object based on the attributes
	// copied in the previous step.
	elems.eq(-1).attr(obj);
	}
	// A placeholder div with a unique id replaces the intended element's
	// tag in the parsed HTML string.
	return '<' + slash + 'div'
	+ (slash ? '' : ' id="' + prefix + (elems.length - 1) + '"') + '>';
	});

	// If no placeholder elements were necessary, just return normal
	// jQuery-parsed HTML.
	if ( !elems.length ) {
	return $(html);
	}
	// Create parent node if it hasn't been created yet.
	if ( !parent ) {
	parent = $('<div/>');
	}
	// Create the parent node and append the parsed, place-held HTML.
	parent.html(htmlParsed);
	// Replace each placeholder element with its intended element.
	$.each(elems, function(i) {
	var elem = parent.find('#' + prefix + i).before(elems[i]);
	elems.eq(i).html(elem.contents());
	elem.remove();
	});
	// Return the topmost intended element(s), sans text nodes, while removing
	// them from the parent element with unwrap.
	return parent.children().unwrap();
	};

	}(jQuery));