Skip to content

Instantly share code, notes, and snippets.

@Warsaalk
Last active September 7, 2016 08:04
Show Gist options
  • Save Warsaalk/8e787cb1067eeb36467235d7a70d7e1d to your computer and use it in GitHub Desktop.
Save Warsaalk/8e787cb1067eeb36467235d7a70d7e1d to your computer and use it in GitHub Desktop.
Parse HTML to DOM
function parseHTMLtoDOM (html) {
var regexTags = /(<[a-zA-Z1-9]+(?:\s+[a-zA-Z-]+(?:=(?:".*?"|'.*?'))?)*\s*\/?>|<\/[a-zA-Z1-9]+>)/gm, // Match tags (opening incl. attributes & closing)
regexAttr = /(?:\s+([a-zA-Z-]+)(?:=(?:"(.*?)"|'(.*?)'))?)/gm; // Match attributes
var singletons = ['area','base','br','col','command','embed','hr','img','input','keygen','link','meta','param','source','track','wbr']; // HTML 5 - 6/9/2016
html = html.replace(/&nbsp;/g, '\u00A0'); // TODO: add more encoding fixes
var parts = html.split(regexTags), fragment = document.createDocumentFragment(), lastElement = fragment;
for (var i=0, il=parts.length; i<il; i++) {
if (parts[i].length > 0) {
var tag = parts[i].match(/^<(\/)?([a-zA-Z1-9]+)/);
if (tag && tag.length > 0) { // Process tag
if (tag[1] === undefined) { // Opening tag
var element = document.createElement(tag[2]); // Create tag
while ((attributes = regexAttr.exec(parts[i])) !== null) { // Loop attributes
var attribute = document.createAttribute(attributes[1]);
if (attributes[2] !== undefined) {
attribute.value = attributes[2]; // Set attribute value
}
element.setAttributeNode(attribute); // Add attribute
}
lastElement.appendChild(element); // Append our element
if (singletons.indexOf(tag[2]) === -1) { //Singletons can't have child elements
lastElement = element; // Set last element as current element
}
} else { // Closing tag
lastElement = lastElement.parentNode; // Closing tag means we need to go 1 level up again
}
} else {
lastElement.appendChild(document.createTextNode(parts[i])); // Append content to the last element
}
}
}
return fragment;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment