Skip to content

Instantly share code, notes, and snippets.

@tmpvar
Created June 10, 2010 01:38
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save tmpvar/432454 to your computer and use it in GitHub Desktop.
Save tmpvar/432454 to your computer and use it in GitHub Desktop.
var text = '<div><div class="container" >asd<div fuck="cakes" class="left column"><div id="date"></div><div id="address"></div></div><div class="right column"><div id="email"></div></div></div></div>';
var intag = false, tmpbuf = "", endtag = false, abspos = 0;
function parse(html, pos, lvl) {
var level = lvl || 0, current = level, ret = [], i=abspos, origpos = abspos;
for (i; i<html.length; i+=1) {
abspos = i;
var char = html[i];
if (char === '<') {
intag = true;
if (tmpbuf.length > 0) {
ret.push(tmpbuf);
tmpbuf = "";
current+=1;
} else if (origpos !== abspos) {
current+=1;
}
if (current > level) {
var res = parse(html, abspos, level);
if (res.length && res.length > 0) {
ret.push(res);
}
}
} else if (char === '>') {
intag = false;
if (tmpbuf.length > 0) {
var explode = tmpbuf.split(" ");
var tag = explode.shift();
if (tag.indexOf("/") === -1) {
ret.push(tag);
if (explode.length > 0) {
var attrs = {}, attr, exattr, key, value;
for (var j = 0; j < explode.length; j++) {
attr = explode[j].split("=");
value = attr[1] || attr[0];
var tofind = attr[0].substring(0,1);
// if the attr has spaces :)
while(value &&
value.match(new RegExp(attr[0],"g")) &&
value.match(new RegExp(attr[0],"g")).length < 2) {
j+=1;
if (typeof explode[j] === "undefined") {
break;
}
else if (!explode[j]) {
continue;
}
value += " " + explode[j];
}
attrs[attr[0]] = value.replace(/^["' ]+/, '').replace(/["' ]+$/, '');
}
ret.push(attrs);
}
} else {
current-=1;
if (level <= current) {
return ret;
}
}
tmpbuf="";
}
} else {
tmpbuf += char;
}
}
return ret;
}
console.log(parse(text));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment