Last active
May 14, 2021 15:41
-
-
Save dbazile/1043191628bbe9bc7c8cff97886db37a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ABSTRACT | |
An attempt to programmatically apply highlights to a complex DOM | |
element using a set of character ranges. Ultimately, this | |
implementation does not work properly as the highlighted nodes go | |
off track when there are multiple ranges in the same text node. I | |
got tired of looking at it (and will probably go with rangy or | |
something) but something in here may be useful someday. | |
Updated 2021-02-25: I think I fixed it. The problem was that I | |
was advancing the node -AND- the span in the final case when the | |
span consumed. Moved some stuff around and I think it works now. | |
DIAGRAM AND SEQUENCE | |
0 1 2 3 4 5 6 7 8 9 10 | |
| | | | | | | | | | | | |
nodes | ############### | ######### | ######### | ### | ### | ### | | |
| (A) | | | (B) | | (C) | | (D) | (E) | (F) | | |
| | | | | | | | | | | | |
| | | | | | | | | | | | |
spans | | | ######### | | ### | ############### | ### | | |
| | | (W) | | | (X) | (Y) | | | (Z) | | |
| | | | | | | | | | | | |
0 1 2 3 4 5 6 7 8 9 10 | |
Sequence: | |
(1) pos = 0 | |
node(A).size = 3 | |
span(W).start = 2 - pos = 2 | |
span(W).stop = 4 - pos = 4 | |
* mark 2-3, throw away a node | |
* [span ends out of bounds] advance node | |
(2) pos = 3 | |
node(B).size = 2 | |
span(W).start = 2 - pos = -1 // 0 | |
span(W).stop = 4 - pos = 1 | |
* mark 0-1, throw away a node | |
* [span ends] advance span | |
(3) pos = 3 | |
node(B).size = 2 | |
span(X).start = 5 - pos = 2 | |
span(X).stop = 6 - pos = 3 | |
* [span starts out of bounds] advance node | |
(4) pos = 5 | |
node(C).size = 2 | |
span(X).start = 5 - pos = 0 | |
span(X).stop = 6 - pos = 1 | |
* mark 0-1, throw away a node | |
* [span ends] advance span | |
(5) pos = 5 | |
node(C).size = 2 | |
span(Y).start = 6 - pos = 1 | |
span(Y).stop = 9 - pos = 4 | |
* mark 1-2 | |
* [span ends out of bounds] advance node | |
(6) pos = 7 | |
node(D).size = 1 | |
span(Y).start = 6 - pos = -1 // 0 | |
span(Y).stop = 9 - pos = 2 | |
* mark 0-1 | |
* [span ends out of bounds] advance node | |
(7) pos = 8 | |
node(E).size = 1 | |
span(Y).start = 6 - pos = -2 // 0 | |
span(Y).stop = 9 - pos = 1 | |
* mark 0-1 | |
* [span ends] advance span | |
(8) pos = 8 | |
node(E).size = 1 | |
span(Z).start = 9 - pos = 1 | |
span(Z).stop = 10 - pos = 2 | |
* [span start out of bounds] advance node | |
(9) pos = 9 | |
node(F).size = 1 | |
span(Z).start = 9 - pos = 0 | |
span(Z).stop = 10 - pos = 1 | |
* mark 0-1 | |
* [span ends] advance span | |
(10) pos = 9 | |
span = null | |
* [spans exhausted] end sequence | |
CONSIDERATIONS | |
- Both HTML tags and HTML entities need to be factored in when | |
generating the character ranges. | |
- Cases "ranges overlap" and "ranges out of order" are -NOT- | |
handled by this implementation. | |
REFERENCE IMPLEMENTATION | |
Got the basic process down from here: | |
https://github.com/olivernn/moonwalkers/blob/master/src/main.js | |
https://olivernn.github.io/moonwalkers/ | |
Falls short because it doesn't handle multiple spans per text node or | |
spans that cross text node boundaries. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(() => { | |
console.clear() | |
console.info('%c%s', 'background-color:red', new Date()) | |
// Note: there are line breaks in the spans | |
window.spans = [[0,3],[4,20],[37,40],[41,46],[52,54],[56,58],[59,64],[65,72],[74,80],[82,87],[89,91],[92,94],[96,98],[102,108],[109,111],[117,119],[129,132],[133,137],[138,141],[150,154],[155,157],[158,162],[171,173],[174,178],[181,184],[186,191],[192,197],[200,202],[203,207],[214,221],[232,237],[241,246],[247,255],[256,258],[259,261],[267,269],[270,272],[275,279],[286,288],[291,293],[296,307],[310,312],[320,323],[324,326],[327,329],[331,333],[342,347],[349,351],[360,364],[372,378],[382,385],[387,389],[392,395],[398,402],[408,410],[413,415],[417,423],[427,433],[436,438],[440,444]] | |
window.i = 0 | |
// Element to be highlighted | |
const target = document.createElement('div') | |
target.style.font = '30px monospace' | |
target.innerHTML = '\n<p><u>Lorem</u> ipsum <strong>dolor sit amet, consectetur</strong> adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.</p>\n<p>Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat.</p>\n<p>Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>000000000001' | |
document.body.appendChild(target) | |
// Previews to show what should be happening | |
const previews = document.createElement('div') | |
previews.style.display = 'flex' | |
previews.style.flexDirection = 'row' | |
previews.style.marginTop = '50px' | |
document.body.appendChild(previews) | |
// What text the spans should contain | |
const spanPreview = document.createElement('pre') | |
spanPreview.textContent = spans | |
.map(o => `${o[0].toString().padStart(4)},${o[1].toString().padStart(4)} '${target.textContent.substring(...o).replace(/\n/g, '\\n')}'`) | |
.join('\n') | |
previews.appendChild(spanPreview) | |
// What the highlighted text should look like | |
const textPreview = document.createElement('div') | |
textPreview.style.marginLeft = '50px' | |
textPreview.style.display = 'flex' | |
textPreview.style.flexDirection = 'row' | |
textPreview.style.flexWrap = 'wrap' | |
previews.appendChild(textPreview) | |
const chars = target.textContent.split('') | |
for (let i = 0; i < chars.length; i++) { | |
const c = chars[i] === '\n' ? '\\n' : chars[i] | |
const cell = document.createElement('pre') | |
cell.style.display = 'flex' | |
cell.style.flexDirection = 'column' | |
cell.style.padding = '5px' | |
cell.style.textAlign = 'center' | |
if (spans.find(t => (i >= t[0]) && (i < t[1]))) { | |
cell.style.backgroundColor = '#ff03' | |
} | |
cell.textContent = `${chars[i] === '\n' ? '\\n' : chars[i]}\n\n${i}` | |
textPreview.appendChild(cell) | |
} | |
window.walker = document.createTreeWalker(target, NodeFilter.SHOW_TEXT) | |
})(); | |
let node = walker.nextNode() | |
let span = spans.shift() | |
let pos = 0 | |
let begin, end | |
function mark(n, begin, end) { | |
if (!n.textContent.substring(begin, end).trim()) { | |
console.debug('discard empty mark', { pos, begin, end }, node) | |
return walker.nextNode() | |
} | |
console.info(`%c%s`, | |
'background-color:yellow', | |
JSON.stringify(n.textContent.substring(begin, end)), | |
{ pos, begin, end, len: node.textContent.length, node: node.textContent }); | |
const range = document.createRange() | |
const tag = document.createElement('mark') | |
tag.style.display = 'inline' | |
tag.title = `[${i}] ${span[0]} - ${span[1]}` | |
tag.style.borderLeft = '1px solid green' | |
range.setStart(n, begin) | |
range.setEnd(n, end) | |
range.surroundContents(tag) | |
// throw next node away (it'll be the text node inside of `tag`) | |
walker.nextNode() | |
return walker.nextNode() | |
} | |
async function main() { | |
const CHECKPOINT = 600 | |
const dashboard = document.createElement('div') | |
dashboard.style.fontFamily = 'monospace' | |
document.body.insertBefore(dashboard, document.body.firstChild) | |
const btn = document.createElement('button') | |
btn.textContent = 'ADVANCE' | |
dashboard.appendChild(btn) | |
const loc = document.createElement('span') | |
loc.textContent = '#' | |
dashboard.appendChild(loc) | |
while (true) { | |
if (i > 500) { | |
console.warn(`break`, { pos, begin: span ? span[0] : null, end: span ? span[1] : null, len: node?.textContent.length }, node) | |
break | |
} | |
if (!node) { | |
console.info(`nodes exhausted`, { pos, begin: span ? span[0] : null, end: span ? span[1] : null, len: node?.textContent.length }, node) | |
break | |
} | |
if (!span) { | |
console.info(`spans exhausted`, { pos, begin: span ? span[0] : null, end: span ? span[1] : null, len: node?.textContent.length }, node) | |
break | |
} | |
// DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG | |
loc.textContent = `pos=${pos} span=${span} begin=${Math.max(0, span[0] - pos)} end=${span[1] - pos} node=${JSON.stringify(node.textContent)}` | |
if (i >= CHECKPOINT) { | |
await new Promise(resolve => btn.onclick = resolve) | |
} | |
// DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG | |
const len = node.textContent.length | |
begin = Math.max(0, span[0] - pos) | |
end = span[1] - pos | |
console.info('[%d]', ++i, { pos, span: span.join(', '), begin, end, len }, node) | |
// span starts downstream | |
if (begin >= len) { | |
node = walker.nextNode() | |
pos += len | |
console.info('advanced node: span starts downstream', node) | |
continue | |
} | |
// span ends downstream | |
if (end >= len) { | |
node = mark(node, begin, len) | |
pos += len | |
console.info('advanced node: span ends downstream', node) | |
continue | |
} | |
// span ended upstream | |
if (span[1] < pos) { | |
span = spans.shift() | |
console.info(`advanced span: span ended upstream`, span) | |
continue | |
} | |
// span consumed | |
if (end < len) { | |
if (end > 0) { | |
node = mark(node, begin, end) | |
pos += end | |
} | |
span = spans.shift() | |
console.info(`advanced span: span consumed`, span) | |
continue | |
} | |
console.warn('wat', { pos, begin: span[0], end: span[1], len }, node) | |
debugger | |
} | |
btn.disabled = true | |
btn.textContent = 'COMPLETE' | |
} | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment