Skip to content

Instantly share code, notes, and snippets.

@arextar
Created July 26, 2011 23:23
Show Gist options
  • Save arextar/1108344 to your computer and use it in GitHub Desktop.
Save arextar/1108344 to your computer and use it in GitHub Desktop.
Simple html fragment parser
parse=(function(document,r_start,r_attr,r_end,cache){
return function(str){
//If there is already a cached element, return its clone
if(cache[str]) return cache[str].cloneNode(true)
//frag and par are originally the same. par is the current parent and frag is the document fragment being returned
var frag,par=frag=document.createDocumentFragment();
//Function to parse after a tag has closed
function parse_e(){
//If there is no string, no need to continue
if(str){
//Execute a regular expression, then remove it from the string
var ret=r_end.exec(str);
str=str.replace(r_end,"")
//If there is text, append it
ret[1]&&par.appendChild(document.createTextNode(ret[1]))
//go up to the previous parent
par=par.parentNode||frag;
//Parse the next part
parse_s();
}
}
//Parse before a tag starts
function parse_s(ret,elem,a,e){
//if there is even a string and the regular expression comes up with something
if(str&&(ret=r_start.exec(str))){
//remove that regular expression from the string
str=str.replace(r_start,"")
//If there is text, append it
ret[1]&&par.appendChild(document.createTextNode(ret[1]))
//If there is a tag
if(ret[2]){
//Create and append an element with the tag
elem=par.appendChild(document.createElement(ret[2]))
//assign a variable to the attribute section of results
a=ret[3]
//While there is still an attribute
while(e=r_attr.exec(a)){
//Remove it
a=a.replace(r_attr,"");
//And set it to the element
e[1]?elem.setAttribute(e[1],e[2]||e[3]):elem.setAttribute(e[4],e[4])
}
}
//If the element is not unary, make it the new parent
if(!ret[4]){
par=elem;
}
//If an end is in sight, parse that, otherwise parse a new element
r_end.test(str)?parse_e():parse_s();
}
}
//begin the process
parse_s()
return (cache[str]=frag).cloneNode(true);
}
})(document,
//r_start:
/^([^<]*)(?:<(\w+)\s*((?:\s*\w+\s*(?:=\s*(?:'[^']*'|"[^"]*"))?)*)\s*(\/?)>)?/,
//r_end:
/\s*(\w+)\s*=\s*(?:'([^']*)'|"([^"]*)")|\s*(\w+)/,
//r_attr:
/^([^<]*)<\/(\w+)>/,
//cache:
{});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment