Skip to content

Instantly share code, notes, and snippets.

@brianmcallister
Last active August 29, 2015 14:16
Show Gist options
  • Save brianmcallister/9ec1780838ac67fb8598 to your computer and use it in GitHub Desktop.
Save brianmcallister/9ec1780838ac67fb8598 to your computer and use it in GitHub Desktop.
Highlight words in text. Still need to remove the underscore dependency.
# Private: Highlight individual words within the message text.
#
# text - Text to format.
# words - Array of words to highlight.
#
# Returns the formatted text.
highlightWords = (text, words = []) ->
return text if words.length is 0
# Create a DOM element to contain the text so we can detect and handle
# HTML inside the `text`.
testDiv = document.createElement 'div'
testDiv.innerHTML = text
# Recursively highlight all the text content inside the testDiv node.
highlightNodeContent testDiv, words
# Return the string with all the content highlighted as HTML.
text = _.unescape testDiv.innerHTML
return text
# Private: Recursively highlight all occurrences of `words` inside a `node`.
#
# node - Node in which to highlight text content.
# words - List of words to highlight.
#
# Returns undefined.
highlightNodeContent = (node, words) ->
if node.childNodes.length
# Recurse into child nodes.
for childNode in node.childNodes
highlightNodeContent childNode, words
# Don't do anything if this isn't of type TEXT_NODE.
if node.nodeType isnt 3
return undefined
content = _.escape node.textContent
# Don't do anything if there's no content.
if content.trim() is ''
return undefined
# Replace.
node.textContent = highlightWordsInText words, content
return undefined
# Private: Highlight words in some text.
#
# words - Array of words to highlight in the text.
# text - String of text in which to highlight words.
#
# Returns the text with words highlighted.
highlightWordsInText = (words, text) ->
boundaries = getHighlightWordBoundaries words, text
return addHighlightWordsMarkup text, boundaries
# Private: Given words and text, build an array of word boundaries within
# the text. Overlapping words will be handled appropriately. See the examples.
#
# words - Array of words to highlight in the text.
# text - String of text in which to highlight words.
#
# Examples
#
# getHighlightWordBoundaries ['test'], 'aaatestaaa'
# #=> [[3, 7]]
#
# getHighlightWordBoundaries ['z'], 'aaazaaazaaa'
# #=> [[3, 4], [7, 8]]
#
# getHighlightWordBoundaries ['do', 'dollar'], 'aaa do dollar aaa'
# #=> [[4, 6], [7, 9]]
#
# As you can see in the above examples, 'overlapping' words should be handled
# correctly. If they weren't, you would get too many boundaries back, making
# highlighting the text very difficult.
#
# For example, the string 'do' is a substring of 'dollar'. If the overlapping
# boundaries weren't handled, this function would return an array with *3*
# inidicies, marking 'do' (after 'aaa'), 'do' (inside 'dollar'), and 'dollar'.
#
# Returns an array of word boundaries.
getHighlightWordBoundaries = (words, textString) ->
regex = ''
boundaries = []
textString = textString.toLowerCase()
# Create a RegExp that we'll use to escape RegExp metacharacters from the
# search string.
metaCharacterRegExp = new RegExp ['\\\\', '\\^', '\\$', '\\.', '\\|',
'\\?', '\\*', '\\+', '\\(', '\\)', '\\[', '\\{'].join('|'), 'g'
# Add word start index and word length to the ranges array.
for word, index in words
word = word.toLowerCase()
# Escape RegExp metacharacters.
escaped = word.replace metaCharacterRegExp, (match) -> "\\#{match}"
regex = new RegExp escaped, 'g'
# Find all matches in the string without reseting the lastIndex.
# See: http://mzl.la/1yi7TmE
while result = regex.exec textString
boundaries.push [regex.lastIndex - word.length, regex.lastIndex]
# Return now if there's one or less boundary.
if boundaries.length <= 1
return boundaries
# Flatten the boundaries down into non-overlapping ranges.
flattened = []
collapsed = []
# Sort the boundaries by the starting ranges. This makes detecting adjacent
# boundaries much easier.
boundaries.sort (a, b) -> a[0] - b[0]
# Get all the numbers covered by every range in the boundaries. Make sure
# they're unique.
for range, index in boundaries
# Push in a floating point number when the _start_ of a range is a unique
# number. This floating point number allows us to indicate where adjacent
# boundaries are, even after sorting the flattened array.
if index isnt 0 and flattened.indexOf(range[0]) is -1
flattened.push flattened[flattened.length - 1] + 0.5
# Add the number, making sure it's unique.
for num in [range[0]..range[1]]
if flattened.indexOf(num) is -1
flattened.push num
# Sort the flattened array of numbers.
flattened.sort (a, b) -> a - b
# Iterate over the flattened array of numbers, and construct a new array
# of word boundries.
for num, index in flattened
# Reference the last array in the collapsed boundaries array.
last = collapsed[collapsed.length - 1]
# On the first iteration, push an array with the current number.
if index is 0
collapsed.push [num]
continue
# On the last iteration, push the current number into the last array.
if index is flattened.length - 1
last.push num
continue
# If the last array already has a length of 2, push in a new array with
# the current number.
if not last or last.length is 2
collapsed.push [num]
continue
# If the last array has a length of one, check if the current number is
# a floating point (which indicates a new boundary).
if last.length is 1
if num % 1 isnt 0
# Push in the previous number (we don't care about the floating point
# numbers after this step, they're just temporary indicators).
last.push flattened[index - 1]
return collapsed
# Private: Add highlight markup in some text based on word boundaries.
#
# text - Text to highlight.
# boundaries - Array of word boundaries to indicate where to highlight.
#
# Returns the text with the appropriate markup added.
addHighlightWordsMarkup = (text, boundaries) ->
startTag = '<mark>'
endTag = '</mark>'
for range, index in boundaries
if index isnt 0
offset = (startTag.length + endTag.length) * index
startIndex = range[0] + offset
endIndex = range[1] + offset
else
startIndex = range[0]
endIndex = range[1]
start = text.slice 0, startIndex
word = text.slice startIndex, endIndex
end = text.slice endIndex
text = "#{start}#{startTag}#{word}#{endTag}#{end}"
return text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment