brianmcallister/highlight.coffee

## highlight.coffee
# Private: Highlight individual words within the message text.
#
# text - Text to format.
# words - Array of words to highlight.
#
# Returns the formatted text.
highlightWords = (text, words = []) ->
  return text if words.length is 0

  # Create a DOM element to contain the text so we can detect and handle
  # HTML inside the `text`.
  testDiv = document.createElement 'div'
  testDiv.innerHTML = text

  # Recursively highlight all the text content inside the testDiv node.
  highlightNodeContent testDiv, words

  # Return the string with all the content highlighted as HTML.
  text = _.unescape testDiv.innerHTML
  return text

# Private: Recursively highlight all occurrences of `words` inside a `node`.
#
# node - Node in which to highlight text content.
# words - List of words to highlight.
#
# Returns undefined.
highlightNodeContent = (node, words) ->
  if node.childNodes.length
    # Recurse into child nodes.
    for childNode in node.childNodes
      highlightNodeContent childNode, words

  # Don't do anything if this isn't of type TEXT_NODE.
  if node.nodeType isnt 3
    return undefined

  content = _.escape node.textContent

  # Don't do anything if there's no content.
  if content.trim() is ''
    return undefined

  # Replace.
  node.textContent = highlightWordsInText words, content
  return undefined

# Private: Highlight words in some text.
#
# words - Array of words to highlight in the text.
# text - String of text in which to highlight words.
#
# Returns the text with words highlighted.
highlightWordsInText = (words, text) ->
  boundaries = getHighlightWordBoundaries words, text
  return addHighlightWordsMarkup text, boundaries

# Private: Given words and text, build an array of word boundaries within
# the text. Overlapping words will be handled appropriately. See the examples.
#
# words - Array of words to highlight in the text.
# text - String of text in which to highlight words.
#
# Examples
#
#   getHighlightWordBoundaries ['test'], 'aaatestaaa'
#   #=> [[3, 7]]
#
#   getHighlightWordBoundaries ['z'], 'aaazaaazaaa'
#   #=> [[3, 4], [7, 8]]
#
#   getHighlightWordBoundaries ['do', 'dollar'], 'aaa do dollar aaa'
#   #=> [[4, 6], [7, 9]]
#
# As you can see in the above examples, 'overlapping' words should be handled
# correctly. If they weren't, you would get too many boundaries back, making
# highlighting the text very difficult.
#
# For example, the string 'do' is a substring of 'dollar'. If the overlapping
# boundaries weren't handled, this function would return an array with *3*
# inidicies, marking 'do' (after 'aaa'), 'do' (inside 'dollar'), and 'dollar'.
#
# Returns an array of word boundaries.
getHighlightWordBoundaries = (words, textString) ->
  regex = ''
  boundaries = []
  textString = textString.toLowerCase()

  # Create a RegExp that we'll use to escape RegExp metacharacters from the
  # search string.
  metaCharacterRegExp = new RegExp ['\\\\', '\\^', '\\$', '\\.', '\\|',
    '\\?', '\\*', '\\+', '\\(', '\\)', '\\[', '\\{'].join('|'), 'g'

  # Add word start index and word length to the ranges array.
  for word, index in words
    word = word.toLowerCase()

    # Escape RegExp metacharacters.
    escaped = word.replace metaCharacterRegExp, (match) -> "\\#{match}"
    regex = new RegExp escaped, 'g'

    # Find all matches in the string without reseting the lastIndex.
    # See: http://mzl.la/1yi7TmE
    while result = regex.exec textString
      boundaries.push [regex.lastIndex - word.length, regex.lastIndex]

  # Return now if there's one or less boundary.
  if boundaries.length <= 1
    return boundaries

  # Flatten the boundaries down into non-overlapping ranges.
  flattened = []
  collapsed = []

  # Sort the boundaries by the starting ranges. This makes detecting adjacent
  # boundaries much easier.
  boundaries.sort (a, b) -> a[0] - b[0]

  # Get all the numbers covered by every range in the boundaries. Make sure
  # they're unique.
  for range, index in boundaries
    # Push in a floating point number when the _start_ of a range is a unique
    # number. This floating point number allows us to indicate where adjacent
    # boundaries are, even after sorting the flattened array.
    if index isnt 0 and flattened.indexOf(range[0]) is -1
      flattened.push flattened[flattened.length - 1] + 0.5

    # Add the number, making sure it's unique.
    for num in [range[0]..range[1]]
      if flattened.indexOf(num) is -1
        flattened.push num

  # Sort the flattened array of numbers.
  flattened.sort (a, b) -> a - b

  # Iterate over the flattened array of numbers, and construct a new array
  # of word boundries.
  for num, index in flattened
    # Reference the last array in the collapsed boundaries array.
    last = collapsed[collapsed.length - 1]

    # On the first iteration, push an array with the current number.
    if index is 0
      collapsed.push [num]
      continue

    # On the last iteration, push the current number into the last array.
    if index is flattened.length - 1
      last.push num
      continue

    # If the last array already has a length of 2, push in a new array with
    # the current number.
    if not last or last.length is 2
      collapsed.push [num]
      continue

    # If the last array has a length of one, check if the current number is
    # a floating point (which indicates a new boundary).
    if last.length is 1
      if num % 1 isnt 0
        # Push in the previous number (we don't care about the floating point
        # numbers after this step, they're just temporary indicators).
        last.push flattened[index - 1]

  return collapsed

# Private: Add highlight markup in some text based on word boundaries.
#
# text - Text to highlight.
# boundaries - Array of word boundaries to indicate where to highlight.
#
# Returns the text with the appropriate markup added.
addHighlightWordsMarkup = (text, boundaries) ->
  startTag = '<mark>'
  endTag = '</mark>'

  for range, index in boundaries
    if index isnt 0
      offset =  (startTag.length + endTag.length) * index
      startIndex = range[0] + offset
      endIndex = range[1] + offset
    else
      startIndex = range[0]
      endIndex = range[1]

    start = text.slice 0, startIndex
    word = text.slice startIndex, endIndex
    end = text.slice endIndex
    text = "#{start}#{startTag}#{word}#{endTag}#{end}"

  return text
	# Private: Highlight individual words within the message text.
	#
	# text - Text to format.
	# words - Array of words to highlight.
	#
	# Returns the formatted text.
	highlightWords = (text, words = []) ->
	return text if words.length is 0

	# Create a DOM element to contain the text so we can detect and handle
	# HTML inside the `text`.
	testDiv = document.createElement 'div'
	testDiv.innerHTML = text

	# Recursively highlight all the text content inside the testDiv node.
	highlightNodeContent testDiv, words

	# Return the string with all the content highlighted as HTML.
	text = _.unescape testDiv.innerHTML
	return text

	# Private: Recursively highlight all occurrences of `words` inside a `node`.
	#
	# node - Node in which to highlight text content.
	# words - List of words to highlight.
	#
	# Returns undefined.
	highlightNodeContent = (node, words) ->
	if node.childNodes.length
	# Recurse into child nodes.
	for childNode in node.childNodes
	highlightNodeContent childNode, words

	# Don't do anything if this isn't of type TEXT_NODE.
	if node.nodeType isnt 3
	return undefined

	content = _.escape node.textContent

	# Don't do anything if there's no content.
	if content.trim() is ''
	return undefined

	# Replace.
	node.textContent = highlightWordsInText words, content
	return undefined

	# Private: Highlight words in some text.
	#
	# words - Array of words to highlight in the text.
	# text - String of text in which to highlight words.
	#
	# Returns the text with words highlighted.
	highlightWordsInText = (words, text) ->
	boundaries = getHighlightWordBoundaries words, text
	return addHighlightWordsMarkup text, boundaries

	# Private: Given words and text, build an array of word boundaries within
	# the text. Overlapping words will be handled appropriately. See the examples.
	#
	# words - Array of words to highlight in the text.
	# text - String of text in which to highlight words.
	#
	# Examples
	#
	# getHighlightWordBoundaries ['test'], 'aaatestaaa'
	# #=> [[3, 7]]
	#
	# getHighlightWordBoundaries ['z'], 'aaazaaazaaa'
	# #=> [[3, 4], [7, 8]]
	#
	# getHighlightWordBoundaries ['do', 'dollar'], 'aaa do dollar aaa'
	# #=> [[4, 6], [7, 9]]
	#
	# As you can see in the above examples, 'overlapping' words should be handled
	# correctly. If they weren't, you would get too many boundaries back, making
	# highlighting the text very difficult.
	#
	# For example, the string 'do' is a substring of 'dollar'. If the overlapping
	# boundaries weren't handled, this function would return an array with 3
	# inidicies, marking 'do' (after 'aaa'), 'do' (inside 'dollar'), and 'dollar'.
	#
	# Returns an array of word boundaries.
	getHighlightWordBoundaries = (words, textString) ->
	regex = ''
	boundaries = []
	textString = textString.toLowerCase()

	# Create a RegExp that we'll use to escape RegExp metacharacters from the
	# search string.
	metaCharacterRegExp = new RegExp ['\\\\', '\\^', '\\$', '\\.', '\\\|',
	'\\?', '\\*', '\\+', '\\(', '\\)', '\\[', '\\{'].join('\|'), 'g'

	# Add word start index and word length to the ranges array.
	for word, index in words
	word = word.toLowerCase()

	# Escape RegExp metacharacters.
	escaped = word.replace metaCharacterRegExp, (match) -> "\\#{match}"
	regex = new RegExp escaped, 'g'

	# Find all matches in the string without reseting the lastIndex.
	# See: http://mzl.la/1yi7TmE
	while result = regex.exec textString
	boundaries.push [regex.lastIndex - word.length, regex.lastIndex]

	# Return now if there's one or less boundary.
	if boundaries.length <= 1
	return boundaries

	# Flatten the boundaries down into non-overlapping ranges.
	flattened = []
	collapsed = []

	# Sort the boundaries by the starting ranges. This makes detecting adjacent
	# boundaries much easier.
	boundaries.sort (a, b) -> a[0] - b[0]

	# Get all the numbers covered by every range in the boundaries. Make sure
	# they're unique.
	for range, index in boundaries
	# Push in a floating point number when the _start_ of a range is a unique
	# number. This floating point number allows us to indicate where adjacent
	# boundaries are, even after sorting the flattened array.
	if index isnt 0 and flattened.indexOf(range[0]) is -1
	flattened.push flattened[flattened.length - 1] + 0.5

	# Add the number, making sure it's unique.
	for num in [range[0]..range[1]]
	if flattened.indexOf(num) is -1
	flattened.push num

	# Sort the flattened array of numbers.
	flattened.sort (a, b) -> a - b

	# Iterate over the flattened array of numbers, and construct a new array
	# of word boundries.
	for num, index in flattened
	# Reference the last array in the collapsed boundaries array.
	last = collapsed[collapsed.length - 1]

	# On the first iteration, push an array with the current number.
	if index is 0
	collapsed.push [num]
	continue

	# On the last iteration, push the current number into the last array.
	if index is flattened.length - 1
	last.push num
	continue

	# If the last array already has a length of 2, push in a new array with
	# the current number.
	if not last or last.length is 2
	collapsed.push [num]
	continue

	# If the last array has a length of one, check if the current number is
	# a floating point (which indicates a new boundary).
	if last.length is 1
	if num % 1 isnt 0
	# Push in the previous number (we don't care about the floating point
	# numbers after this step, they're just temporary indicators).
	last.push flattened[index - 1]

	return collapsed

	# Private: Add highlight markup in some text based on word boundaries.
	#
	# text - Text to highlight.
	# boundaries - Array of word boundaries to indicate where to highlight.
	#
	# Returns the text with the appropriate markup added.
	addHighlightWordsMarkup = (text, boundaries) ->
	startTag = '<mark>'
	endTag = '</mark>'

	for range, index in boundaries
	if index isnt 0
	offset = (startTag.length + endTag.length) * index
	startIndex = range[0] + offset
	endIndex = range[1] + offset
	else
	startIndex = range[0]
	endIndex = range[1]

	start = text.slice 0, startIndex
	word = text.slice startIndex, endIndex
	end = text.slice endIndex
	text = "#{start}#{startTag}#{word}#{endTag}#{end}"

	return text