Skip to content

Instantly share code, notes, and snippets.

@MadeByMike
Last active February 15, 2018 05:24
Show Gist options
  • Star 8 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save MadeByMike/1c4ca3ecbae07ef46b9e to your computer and use it in GitHub Desktop.
Save MadeByMike/1c4ca3ecbae07ef46b9e to your computer and use it in GitHub Desktop.
Highlighting a sentence in HTML without destroying the markup around it is exceptionally hard!!!
/*!
* Highlight words in html - v0.0.1 - 2015-04-29
* http://madebymike.com.au
* Copyright (c) 2015 Mike Riethmuller; Licensed MIT
*/
/*
* It turns out that annotating a phrase within HTML is exceptionally hard. I've done it so you don't have to.
* The function accepts a phrase (string), HTML (string), and optionally a classname (string)
* It returns the same HTML with <mark> elements surrounding the phrase
* Phrases can be separated by inline elements such as <strong> or <em> or can be split across block elements
* Ignores line breaks, images and empty elements. All that matters is that the words are in order.
*/
function highlightWordsInHTML(phrase, source, classname){
var DOM = document.createElement("div");
DOM.innerHTML = source;
var matches = [];
var matched = "";
var node = DOM;
var depth = 0;
var tmp,skip;
do {
// Examine text nodes, ignore empty nodes
if(!skip && node.nodeType === 3 && node.nodeValue.trim() !== ""){
// try and match as much of the sentence as possible. Start with full length of sentence less what we've matched so far.
var i = phrase.length - matched.length;
while (node.textContent.indexOf(phrase.substr(matched.length, i)) === -1){
i--; // Decrement the length of the substring until we match a portion of the sentence
}
if(i !== 0){
// Sorry about this! To be a valid match different conditions must be met depending on if it is the first part, a middle part or the last part of the sentence
if((
matched.length === 0 // We haven't previously matched part of the sentence
&& (node.textContent.indexOf(phrase.substr(matched.length, i)) + phrase.substr(matched.length, i).length) === node.textContent.length // The matched part ends at the end of the textnode
) || (
matched.length !== 0 // We have previously matched part of the sentence
&& (matched.length + i) < phrase.length // The next matched part is not the last part of the sentence
&& node.textContent.indexOf(phrase.substr(matched.length, i)) === 0 // The next matched part starts at the begining of the textnode
&& (node.textContent.indexOf(phrase.substr(matched.length, i)) + phrase.substr(matched.length, i).length) === node.textContent.length // The next matched part ends at the end of the textnode
) || (
matched.length !== 0 // We have previously matched part of the sentence
&& (matched.length + i) >= phrase.length // The next matched part is the last part of the sentence
&& node.textContent.indexOf(phrase.substr(matched.length, i)) === 0 // The next matched part starts at the begining of the textnode
)){
// Update matched string and store some details in the matches array for the later find and replace step
matches.push({
node: node,
matched: phrase.substr(matched.length, i)
});
matched = matched + phrase.substr(matched.length, i);
}
} else {
// If i === 0 we failed to find the next part of the sentence, reset the matched string and matches array
matched = "";
matches = [];
}
}
// This is a simple DOM walker, we're getting the next child or sibling
// Check out https://gist.github.com/cowboy/958000 for more details
if (!skip && (tmp = node.firstChild)) {
depth++;
} else if (tmp = node.nextSibling) {
skip = false;
} else {
tmp = node.parentNode;
depth--;
skip = true;
}
node = tmp;
} while ( depth > 0 && matched !== phrase ); // Keep walking until all elements have been traversed or the sentence has been fully matched.
if (depth <= 0){ // If depth is 0 the sentence has not been found
console.log('phrase could not be found');
return false;
}
// Finally let's highlight it!!
matches.forEach(function(obj,i){
var re, pos, mark;
var fragment = document.createDocumentFragment();
if(phrase.indexOf(obj.matched) === 0){
// First part of sentence should find only the last occurrence of a match
re = new RegExp("("+obj.matched+"\(\?\!\.\*"+obj.matched+"\))","i");
} else {
// Subsequent parts of sentence should find the first occurrence of a match
re = new RegExp("("+obj.matched+")","i");
}
pos = re.exec(obj.node.nodeValue).index;
if(pos >= 0) {
// create mark element
mark = document.createElement("mark");
mark.className = classname || "";
mark.appendChild(document.createTextNode(obj.matched));
if(pos > 0){
// Begining of sentence: Split textnode then add annotated section
fragment.appendChild(document.createTextNode(obj.node.nodeValue.substr(0, pos)));
fragment.appendChild(mark);
} else if((pos + obj.matched.length) < obj.node.nodeValue.length) {
// End of sentence: Add annotated section then add remainder of textnode
fragment.appendChild(mark);
fragment.appendChild(document.createTextNode(obj.node.nodeValue.substr(obj.matched.length)));
} else {
// Middle of sentence: just add annotated section
fragment.appendChild(mark);
}
}
// Replace the text node with the fragment we created
obj.node.parentNode.replaceChild(fragment,obj.node);
});
return DOM.innerHTML;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment