MadeByMike/highlight-words-in-html.js

## highlight-words-in-html.js
/*!
* Highlight words in html - v0.0.1 - 2015-04-29
* http://madebymike.com.au
* Copyright (c) 2015 Mike Riethmuller; Licensed MIT
*/

/*
* It turns out that annotating a phrase within HTML is exceptionally hard. I've done it so you don't have to.
* The function accepts a phrase (string), HTML (string), and optionally a classname (string)
* It returns the same HTML with <mark> elements surrounding the phrase
* Phrases can be separated by inline elements such as <strong> or <em> or can be split across block elements
* Ignores line breaks, images and empty elements. All that matters is that the words are in order.
*/

function highlightWordsInHTML(phrase, source, classname){
  var DOM = document.createElement("div");
  DOM.innerHTML = source;
  var matches = [];
  var matched = "";
  var node = DOM;
  var depth = 0;
  var tmp,skip;

  do {

    // Examine text nodes, ignore empty nodes
    if(!skip && node.nodeType === 3 && node.nodeValue.trim() !== ""){
      // try and match as much of the sentence as possible. Start with full length of sentence less what we've matched so far.
      var i = phrase.length - matched.length;
      while (node.textContent.indexOf(phrase.substr(matched.length, i)) === -1){
        i--; // Decrement the length of the substring until we match a portion of the sentence
      }

      if(i !== 0){
        // Sorry about this! To be a valid match different conditions must be met depending on if it is the first part, a middle part or the last part of the sentence
        if((
            matched.length === 0 // We haven't previously matched part of the sentence
            && (node.textContent.indexOf(phrase.substr(matched.length, i)) + phrase.substr(matched.length, i).length) === node.textContent.length // The matched part ends at the end of the textnode
          ) || (
            matched.length !== 0 // We have previously matched part of the sentence
            && (matched.length + i) < phrase.length  // The next matched part is not the last part of the sentence
            && node.textContent.indexOf(phrase.substr(matched.length, i)) === 0 // The next matched part starts at the begining of the textnode
            && (node.textContent.indexOf(phrase.substr(matched.length, i)) + phrase.substr(matched.length, i).length) === node.textContent.length // The next matched part ends at the end of the textnode
          ) || (
            matched.length !== 0  // We have previously matched part of the sentence
            && (matched.length + i) >= phrase.length // The next matched part is the last part of the sentence
            && node.textContent.indexOf(phrase.substr(matched.length, i)) === 0 // The next matched part starts at the begining of the textnode
          )){

          // Update matched string and store some details in the matches array for the later find and replace step
          matches.push({
            node: node,
            matched: phrase.substr(matched.length, i)
          });
          matched = matched + phrase.substr(matched.length, i);
        }
      } else {
        // If i === 0 we failed to find the next part of the sentence, reset the matched string and matches array
        matched = "";
        matches = [];
      }
    }

    // This is a simple DOM walker, we're getting the next child or sibling
    // Check out https://gist.github.com/cowboy/958000 for more details
    if (!skip && (tmp = node.firstChild)) {
      depth++;
    } else if (tmp = node.nextSibling) {
      skip = false;
    } else {
      tmp = node.parentNode;
      depth--;
      skip = true;
    }
    node = tmp;
  } while ( depth > 0 && matched !== phrase ); // Keep walking until all elements have been traversed or the sentence has been fully matched.

  if (depth <= 0){ // If depth is 0 the sentence has not been found
    console.log('phrase could not be found');
    return false;
  }

  // Finally let's highlight it!!

  matches.forEach(function(obj,i){
    var re, pos, mark;
    var fragment = document.createDocumentFragment();

    if(phrase.indexOf(obj.matched) === 0){
      // First part of sentence should find only the last occurrence of a match
      re = new RegExp("("+obj.matched+"\(\?\!\.\*"+obj.matched+"\))","i");
    } else {
      // Subsequent parts of sentence should find the first occurrence of a match
      re = new RegExp("("+obj.matched+")","i");
    }

    pos = re.exec(obj.node.nodeValue).index;

    if(pos >= 0) {
      // create mark element
      mark = document.createElement("mark");
      mark.className = classname || "";
      mark.appendChild(document.createTextNode(obj.matched));
      if(pos > 0){
        // Begining of sentence: Split textnode then add annotated section
        fragment.appendChild(document.createTextNode(obj.node.nodeValue.substr(0, pos)));
        fragment.appendChild(mark);
      } else if((pos + obj.matched.length) < obj.node.nodeValue.length) {
        // End of sentence: Add annotated section then add remainder of textnode
        fragment.appendChild(mark);
        fragment.appendChild(document.createTextNode(obj.node.nodeValue.substr(obj.matched.length)));
      } else {
        // Middle of sentence: just add annotated section
        fragment.appendChild(mark);
      }
    }

    // Replace the text node with the fragment we created
    obj.node.parentNode.replaceChild(fragment,obj.node);

  });

  return DOM.innerHTML;

}
	/*!
	* Highlight words in html - v0.0.1 - 2015-04-29
	* http://madebymike.com.au
	* Copyright (c) 2015 Mike Riethmuller; Licensed MIT
	*/

	/*
	* It turns out that annotating a phrase within HTML is exceptionally hard. I've done it so you don't have to.
	* The function accepts a phrase (string), HTML (string), and optionally a classname (string)
	* It returns the same HTML with <mark> elements surrounding the phrase
	* Phrases can be separated by inline elements such as <strong> or <em> or can be split across block elements
	* Ignores line breaks, images and empty elements. All that matters is that the words are in order.
	*/

	function highlightWordsInHTML(phrase, source, classname){
	var DOM = document.createElement("div");
	DOM.innerHTML = source;
	var matches = [];
	var matched = "";
	var node = DOM;
	var depth = 0;
	var tmp,skip;

	do {

	// Examine text nodes, ignore empty nodes
	if(!skip && node.nodeType === 3 && node.nodeValue.trim() !== ""){
	// try and match as much of the sentence as possible. Start with full length of sentence less what we've matched so far.
	var i = phrase.length - matched.length;
	while (node.textContent.indexOf(phrase.substr(matched.length, i)) === -1){
	i--; // Decrement the length of the substring until we match a portion of the sentence
	}

	if(i !== 0){
	// Sorry about this! To be a valid match different conditions must be met depending on if it is the first part, a middle part or the last part of the sentence
	if((
	matched.length === 0 // We haven't previously matched part of the sentence
	&& (node.textContent.indexOf(phrase.substr(matched.length, i)) + phrase.substr(matched.length, i).length) === node.textContent.length // The matched part ends at the end of the textnode
	) \|\| (
	matched.length !== 0 // We have previously matched part of the sentence
	&& (matched.length + i) < phrase.length // The next matched part is not the last part of the sentence
	&& node.textContent.indexOf(phrase.substr(matched.length, i)) === 0 // The next matched part starts at the begining of the textnode
	&& (node.textContent.indexOf(phrase.substr(matched.length, i)) + phrase.substr(matched.length, i).length) === node.textContent.length // The next matched part ends at the end of the textnode
	) \|\| (
	matched.length !== 0 // We have previously matched part of the sentence
	&& (matched.length + i) >= phrase.length // The next matched part is the last part of the sentence
	&& node.textContent.indexOf(phrase.substr(matched.length, i)) === 0 // The next matched part starts at the begining of the textnode
	)){

	// Update matched string and store some details in the matches array for the later find and replace step
	matches.push({
	node: node,
	matched: phrase.substr(matched.length, i)
	});
	matched = matched + phrase.substr(matched.length, i);
	}
	} else {
	// If i === 0 we failed to find the next part of the sentence, reset the matched string and matches array
	matched = "";
	matches = [];
	}
	}

	// This is a simple DOM walker, we're getting the next child or sibling
	// Check out https://gist.github.com/cowboy/958000 for more details
	if (!skip && (tmp = node.firstChild)) {
	depth++;
	} else if (tmp = node.nextSibling) {
	skip = false;
	} else {
	tmp = node.parentNode;
	depth--;
	skip = true;
	}
	node = tmp;
	} while ( depth > 0 && matched !== phrase ); // Keep walking until all elements have been traversed or the sentence has been fully matched.

	if (depth <= 0){ // If depth is 0 the sentence has not been found
	console.log('phrase could not be found');
	return false;
	}

	// Finally let's highlight it!!

	matches.forEach(function(obj,i){
	var re, pos, mark;
	var fragment = document.createDocumentFragment();

	if(phrase.indexOf(obj.matched) === 0){
	// First part of sentence should find only the last occurrence of a match
	re = new RegExp("("+obj.matched+"\(\?\!\.\*"+obj.matched+"\))","i");
	} else {
	// Subsequent parts of sentence should find the first occurrence of a match
	re = new RegExp("("+obj.matched+")","i");
	}

	pos = re.exec(obj.node.nodeValue).index;

	if(pos >= 0) {
	// create mark element
	mark = document.createElement("mark");
	mark.className = classname \|\| "";
	mark.appendChild(document.createTextNode(obj.matched));
	if(pos > 0){
	// Begining of sentence: Split textnode then add annotated section
	fragment.appendChild(document.createTextNode(obj.node.nodeValue.substr(0, pos)));
	fragment.appendChild(mark);
	} else if((pos + obj.matched.length) < obj.node.nodeValue.length) {
	// End of sentence: Add annotated section then add remainder of textnode
	fragment.appendChild(mark);
	fragment.appendChild(document.createTextNode(obj.node.nodeValue.substr(obj.matched.length)));
	} else {
	// Middle of sentence: just add annotated section
	fragment.appendChild(mark);
	}
	}

	// Replace the text node with the fragment we created
	obj.node.parentNode.replaceChild(fragment,obj.node);

	});

	return DOM.innerHTML;

	}