Skip to content

Instantly share code, notes, and snippets.

@bmeck
Created August 25, 2010 15:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bmeck/549720 to your computer and use it in GitHub Desktop.
Save bmeck/549720 to your computer and use it in GitHub Desktop.
//USAGE
// needs jsdom, node-htmlparser
// require('getDom.js')(stream_or_string,callback)
//exposes -> response.sax
// sax.onDirective
// sax.onElement
// sax.onAttribute
// sax.onText
var EventEmitter = require("events").EventEmitter
, htmlparser = require( "node-htmlparser" )
, handler = new htmlparser.DefaultHandler( function ( error, dom ) {
if ( error ) {
this.callback(error)
}
else {
while ( dom[ 0 ].type == "directive" || dom[ 0 ].type == "text" ) {
dom.shift()
}
var path = [ dom[ 0 ] ]
, indices = [ 0 ]
, depth = 0
while ( path.length ) {
node = path[ depth ]
//console.log(sys.inspect(node))
if(!indices[depth]) switch(node.type) {
case "tag":
this.sax.emit("element",node.name)
var attrs = node.attribs
for(var property in attrs) {
this.sax.emit("attribute",property,attrs[property])
}
break
case "script":
this.sax.emit("element","script")
var attrs = node.attribs
for(var property in attrs) {
this.sax.emit("attribute",property,attrs[property])
}
if(attrs && !attrs.src) {
this.sax.emit("text",node.children && node.children.length ? node.children[0].data : "")
}
break
case "style":
var attrs = node.attribs
this.sax.emit("element",attrs && attrs.href ? "link":"style")
for(var property in attrs) {
this.sax.emit("attribute",property,attrs[property])
}
break
case "text":
this.sax.emit("text",node.data)
}
//iteration
if ( node.children ) {
if ( indices[ depth ] < node.children.length ) {
path.push( node.children[ indices[ depth ] ] )
indices[ depth ] += 1
depth++
indices[ depth ] = 0
continue
}
else {
path.pop()
indices.pop()
depth--
if(node.type in {"tag":1,"script":1,"style":1}) this.sax.emit("elementEnd")
}
}
else {
path.pop()
indices.pop()
depth--
if(node.type in {"tag":1,"script":1,"style":1}) this.sax.emit("elementEnd")
}
}
this.callback(false,this.window)
}
}, { enforceEmptyTags: false } )
var jsdom = require("jsdom")
, dom = jsdom.dom.level1.core
module.exports = function GetDOM(stream_or_string,callback) {
var window = jsdom.windowAugmentation(dom);
var document = browser.document
document.defaultView = window
var selected = null
var sax = new EventEmitter
var parser = new htmlparser.Parser( handler )
, parse = function ( data ) {
parser.parseChunk( data )
}
, end = function ( data ) {
parser.parseChunk( data )
parser.done()
}
parser._handler.sax = sax
parser._handler.callback = callback
parser._handler.window = window
sax.addListener("element", function(elemName) {
var node = document.createElement(elemName)
;(selected || document).appendChild(node)
selected = node
} )
sax.addListener("attribute", function(name,value) {
selected.setAttribute(name,value)
} )
sax.addListener("text", function(text) {
if(selected) selected.appendChild(document.createTextNode(text))
} )
sax.addListener("elementEnd", function() {
selected = selected.parentNode
} )
if(typeof stream_or_string == 'string') {
end(stream_or_string)
}
else {
steam_or_string.on('data',function(data){
parse(data)
})
steam_or_string.on('end',function(){
end()
})
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment