Munawwar/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Problem: Say you have a function that takes a parent node and HTML string, which then renders the HTML as child nodes. On the next render, how would you know how much of the previous HTML was in the new HTML without having to parse the new HTML / create DOM nodes?
Another way to define the problem: I do a DOM diff for rendering HTML into a node. Think of it as a more efficient innerHTML update. However I am normally forced to parse the full HTML before even doing the diff. Even if html.startsWith() 90% of the previous HTML used. DOM creation is slower than string comparisons if and when possible.
E.g. Say the first HTML rendered was <b>1</b> and 2nd HTML rendered is <b>1</b><b>2</b>.
Clearly <b>1</b> is already in the DOM and dont need an update. It is a substring of the previous HTML. Ideally we don't even need to create the DOM for diffing purpose.
Potential solution:

First time you render, you have to parse the whole HTML string and create DOM nodes. Can't avoid the cost here.
However try to match each child node to its corresponding HTML substring. Store this is in node._sourceHTML.
On second render, try to match the top parts of HTML string with existing top node's _sourceHTML. These nodes are exact matches to the HTML substring and dont need any updates. It can be ignored/reused in the DOM diffing step.


## reduce-dom-creation.html
<!DOCTYPE html>
<body>
  <div id="liveNode"></div>
  <script>
    let lastTag = /((?:<[a-zA-Z][^\s\/>]*(?:\s+[^\s\/>"'=]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*\s*\/?\s*>)|(?:<!--.+?--!?>)|(?:<!\[CDATA\[[^>]+\]\]>))$/i;
    function parseHtml(parentNode, html) {
      let doc = document.implementation.createHTMLDocument();
      doc.write('<template>');
      let template = doc.body.firstChild;
      let existingNodes = [];

      let clearRemainingSourceHtml = false;
      let prevTextNode;
      for (let liveNode = parentNode.childNodes[0]; liveNode; liveNode = liveNode.nextSibling) {
        if (
          clearRemainingSourceHtml
          || !liveNode._sourceHTML
          || !html.startsWith((prevTextNode ? prevTextNode._sourceHTML : '') + liveNode._sourceHTML)
        ) {
          liveNode._sourceHTML = null;
          if (!clearRemainingSourceHtml) clearRemainingSourceHtml = true;
          continue;
        }
        if (prevTextNode) {
          html = html.slice(prevTextNode._sourceHTML.length);
          existingNodes.push(prevTextNode);
          prevTextNode = null;
        }
        if (liveNode.nodeType == 3) { // 3 = Node.TEXT_NODE
          prevTextNode = liveNode;
        } else {
          html = html.slice(liveNode._sourceHTML.length);
          existingNodes.push(liveNode);
        }
      }
      if (prevTextNode && html.length == prevTextNode._sourceHTML.length) {
        html = html.slice(prevTextNode._sourceHTML.length);
        existingNodes.push(prevTextNode);
      }
      console.log('reused', existingNodes.length, 'existing nodes');

      // console.log(doc);
      let lastNumberOfNodes = 0;
      let startOfLastNode = 0;
      let endOfLastNode = 0;
      // let htmlSlices = [];
      let unbalanced = false; // unbalanced HTML can cause multiple nodes being created for the same start
      let templateChildNodes = template.content.childNodes; // live list
      let buffer = '';
      // and end tags, at which point it is better to bail out of this process
      for (let index = 0; index < html.length; index++) {
        buffer += html[index];
        if (html[index] != '>' && html[index - 1] != '>' && html[index + 1] != '>') {
          endOfLastNode += 1;
          continue;
        }

        doc.write(buffer);
        buffer = '';

        let numberOfNodes = templateChildNodes.length;
        if ((numberOfNodes - lastNumberOfNodes) == 1) {
          if (startOfLastNode !== endOfLastNode) {
            let htmlSlice = html.slice(startOfLastNode, index + 1);
            let offset = 0;
            let foundEndTag = false;
            // character after an end tag's/comment's > should reach here, as a new node gets created then
            if (html[index - 1] == '>') {
              htmlSlice = htmlSlice.slice(0, -1);
              foundEndTag = true;
            } else if (html[index] == '>') { // potentially an open tag
              let match = htmlSlice.match(lastTag)?.[1];
              if (match) {
                htmlSlice = htmlSlice.slice(0, -match.length);
                offset = match.length - 1;
                foundEndTag = true;
              } else {
                // something other than an open tag, comment, cdata caused a new node to be created here.
                // e.g. with unbalanced html, a close tag could cause a new node to be created
                // with perfect balanced html / xhtml, a close tag cannot cause a new node to be created
                unbalanced = true;
                break;
              }
            }
            if (foundEndTag) {
              // htmlSlices.push(htmlSlice);
              templateChildNodes[lastNumberOfNodes - 1]._sourceHTML = htmlSlice;
              startOfLastNode = index - offset;
            }
            // else an invalid character can cause a new text node. e.g. <1
          }
          lastNumberOfNodes = numberOfNodes;
        }
        if ((numberOfNodes - lastNumberOfNodes) > 1) {
          unbalanced = true;
          break;
        }
        endOfLastNode += 1;
      }
      if (!unbalanced && startOfLastNode !== html.length && templateChildNodes.length == lastNumberOfNodes && lastNumberOfNodes) {
        let htmlSlice = html.slice(startOfLastNode);
        // htmlSlices.push(htmlSlice);
        templateChildNodes[templateChildNodes.length - 1]._sourceHTML = htmlSlice;
      } else if (startOfLastNode !== html.length) {
        doc.write(html.slice(startOfLastNode));
      }
      // console.log(htmlSlices);
      document.adoptNode(template)
      return existingNodes.concat(Array.from(template.content.childNodes));
    }


    let html1 = /* html */`
      <div class="section section-1">
        <button>
          <span>test1.1</span>
        </button>
        <span>test1.2</span>
      </div>`;
    let html2 = html1 + /* html */`
      <div class="section section-2">
        <span>test2</span>
      </div>
    `;
    // An unbalanced markup case that causes two nodes to be created when parse hits </em>.
    // We should bail out of the splitting when it detects multiple nodes being created
    // let html = /* html */`
    //   <em><p>Test</em>
    // `;
    // A case with invalid start tag, that gets converted to a text node
    // let html = `<123><<p>test`;

    let liveNode = document.getElementById('liveNode');
    var nodes = parseHtml(liveNode, html1);
    liveNode.replaceChildren(...nodes);
    // at this point all the live nodes has a _sourceHTML property

    nodes = parseHtml(liveNode, html2); // reused 2 existing nodes
    liveNode.replaceChildren(...nodes);
    nodes = parseHtml(liveNode, html2); // reused 5 existing nodes
  </script>
</body>
	<!DOCTYPE html>
	<body>
	<div id="liveNode"></div>
	<script>
	let lastTag = /((?:<[a-zA-Z][^\s\/>](?:\s+[^\s\/>"'=]+(?:\s=\s(?:(?:"[^"]")\|(?:'[^']')\|[^>\s]+))?)\s\/?\s>)\|(?:<!--.+?--!?>)\|(?:<!\[CDATA\[[^>]+\]\]>))$/i;
	function parseHtml(parentNode, html) {
	let doc = document.implementation.createHTMLDocument();
	doc.write('<template>');
	let template = doc.body.firstChild;
	let existingNodes = [];

	let clearRemainingSourceHtml = false;
	let prevTextNode;
	for (let liveNode = parentNode.childNodes[0]; liveNode; liveNode = liveNode.nextSibling) {
	if (
	clearRemainingSourceHtml
	\|\| !liveNode._sourceHTML
	\|\| !html.startsWith((prevTextNode ? prevTextNode._sourceHTML : '') + liveNode._sourceHTML)
	) {
	liveNode._sourceHTML = null;
	if (!clearRemainingSourceHtml) clearRemainingSourceHtml = true;
	continue;
	}
	if (prevTextNode) {
	html = html.slice(prevTextNode._sourceHTML.length);
	existingNodes.push(prevTextNode);
	prevTextNode = null;
	}
	if (liveNode.nodeType == 3) { // 3 = Node.TEXT_NODE
	prevTextNode = liveNode;
	} else {
	html = html.slice(liveNode._sourceHTML.length);
	existingNodes.push(liveNode);
	}
	}
	if (prevTextNode && html.length == prevTextNode._sourceHTML.length) {
	html = html.slice(prevTextNode._sourceHTML.length);
	existingNodes.push(prevTextNode);
	}
	console.log('reused', existingNodes.length, 'existing nodes');

	// console.log(doc);
	let lastNumberOfNodes = 0;
	let startOfLastNode = 0;
	let endOfLastNode = 0;
	// let htmlSlices = [];
	let unbalanced = false; // unbalanced HTML can cause multiple nodes being created for the same start
	let templateChildNodes = template.content.childNodes; // live list
	let buffer = '';
	// and end tags, at which point it is better to bail out of this process
	for (let index = 0; index < html.length; index++) {
	buffer += html[index];
	if (html[index] != '>' && html[index - 1] != '>' && html[index + 1] != '>') {
	endOfLastNode += 1;
	continue;
	}

	doc.write(buffer);
	buffer = '';

	let numberOfNodes = templateChildNodes.length;
	if ((numberOfNodes - lastNumberOfNodes) == 1) {
	if (startOfLastNode !== endOfLastNode) {
	let htmlSlice = html.slice(startOfLastNode, index + 1);
	let offset = 0;
	let foundEndTag = false;
	// character after an end tag's/comment's > should reach here, as a new node gets created then
	if (html[index - 1] == '>') {
	htmlSlice = htmlSlice.slice(0, -1);
	foundEndTag = true;
	} else if (html[index] == '>') { // potentially an open tag
	let match = htmlSlice.match(lastTag)?.[1];
	if (match) {
	htmlSlice = htmlSlice.slice(0, -match.length);
	offset = match.length - 1;
	foundEndTag = true;
	} else {
	// something other than an open tag, comment, cdata caused a new node to be created here.
	// e.g. with unbalanced html, a close tag could cause a new node to be created
	// with perfect balanced html / xhtml, a close tag cannot cause a new node to be created
	unbalanced = true;
	break;
	}
	}
	if (foundEndTag) {
	// htmlSlices.push(htmlSlice);
	templateChildNodes[lastNumberOfNodes - 1]._sourceHTML = htmlSlice;
	startOfLastNode = index - offset;
	}
	// else an invalid character can cause a new text node. e.g. <1
	}
	lastNumberOfNodes = numberOfNodes;
	}
	if ((numberOfNodes - lastNumberOfNodes) > 1) {
	unbalanced = true;
	break;
	}
	endOfLastNode += 1;
	}
	if (!unbalanced && startOfLastNode !== html.length && templateChildNodes.length == lastNumberOfNodes && lastNumberOfNodes) {
	let htmlSlice = html.slice(startOfLastNode);
	// htmlSlices.push(htmlSlice);
	templateChildNodes[templateChildNodes.length - 1]._sourceHTML = htmlSlice;
	} else if (startOfLastNode !== html.length) {
	doc.write(html.slice(startOfLastNode));
	}
	// console.log(htmlSlices);
	document.adoptNode(template)
	return existingNodes.concat(Array.from(template.content.childNodes));
	}


	let html1 = /* html */`
	<div class="section section-1">
	<button>
	<span>test1.1</span>
	</button>
	<span>test1.2</span>
	</div>`;
	let html2 = html1 + /* html */`
	<div class="section section-2">
	<span>test2</span>
	</div>
	`;
	// An unbalanced markup case that causes two nodes to be created when parse hits </em>.
	// We should bail out of the splitting when it detects multiple nodes being created
	// let html = /* html */`
	// <em><p>Test</em>
	// `;
	// A case with invalid start tag, that gets converted to a text node
	// let html = `<123><<p>test`;

	let liveNode = document.getElementById('liveNode');
	var nodes = parseHtml(liveNode, html1);
	liveNode.replaceChildren(...nodes);
	// at this point all the live nodes has a _sourceHTML property

	nodes = parseHtml(liveNode, html2); // reused 2 existing nodes
	liveNode.replaceChildren(...nodes);
	nodes = parseHtml(liveNode, html2); // reused 5 existing nodes
	</script>
	</body>