Last active
October 23, 2019 07:56
-
-
Save petamoriken/70d64b963f312684017347c329432d53 to your computer and use it in GitHub Desktop.
HTML 内の文字列の検索
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** @see https://html.spec.whatwg.org/multipage/syntax.html#void-elements */ | |
const VOID_ELEMENTS = Object.freeze([ | |
"AREA", | |
"BASE", | |
"BR", | |
"COL", | |
"EMBED", | |
"HR", | |
"IMG", | |
"INPUT", | |
"LINK", | |
"META", | |
"PARAM", | |
"SOURCE", | |
"TRACK", | |
"WBR", | |
]); | |
/** @see https://html.spec.whatwg.org/multipage/text-level-semantics.html */ | |
const TEXT_LEVEL_ELEMENTS = Object.freeze([ | |
"A", | |
"EM", | |
"STRONG", | |
"SMALL", | |
"S", | |
"CITE", | |
"Q", | |
"DFN", | |
"ABBR", | |
"RUBY", | |
"RT", | |
"RP", | |
"DATA", | |
"TIME", | |
"CODE", | |
"VAR", | |
"SAMP", | |
"KBD", | |
"SUB", | |
"SUP", | |
"I", | |
"B", | |
"U", | |
"MARK", | |
"BDI", | |
"BDO", | |
"SPAN", | |
"BR", | |
"WBR", | |
]); | |
/** | |
* テキストを検索し該当範囲を返す | |
* 非表示 Element があることを考慮していない | |
* | |
* @param {DocumentFragment | Element} target | |
* @param {string} text | |
* @throws {Error} - target に Comment, ProcessingInstruction が含まれる場合例外を投げる | |
* @returns {Generator<Range>} | |
*/ | |
function* searchText(target, text) { | |
const textLength = text.length; | |
if (textLength === 0) { | |
return; | |
} | |
const offsets = searchTextOffsets(target.textContent, text); | |
for (const rawRange of getNodeRange(target, offsets, textLength)) { | |
const range = new Range(); | |
// Range が Text 全体を示しているとき | |
const startText = rawRange.start[0]; | |
if ( | |
startText === rawRange.end[0] && | |
rawRange.start[1] === 0 && | |
rawRange.end[1] === startText.nodeValue.length | |
) { | |
/** @type {Node} */ | |
let target = startText; | |
// 親の Element がその Text しか持たないかつ Text Level Element の場合 | |
// Range の範囲をその親全体にする | |
let parent; | |
while ( | |
(parent = target.parentNode) !== null && | |
parent.nodeType === Node.ELEMENT_NODE && | |
parent.childNodes.length === 1 && | |
TEXT_LEVEL_ELEMENTS.includes(parent.tagName) | |
) { | |
target = parent; | |
} | |
range.selectNode(target); | |
} else { | |
range.setStart(...rawRange.start); | |
range.setEnd(...rawRange.end); | |
} | |
yield range; | |
} | |
} | |
/** | |
* テキストから該当する先頭位置を返す | |
* | |
* @param {string} targetContent | |
* @param {string} text | |
* @return {number[]} | |
*/ | |
function searchTextOffsets(targetContent, text) { | |
const offsets = []; | |
let currentOffset = -1; | |
while ( | |
(currentOffset = targetContent.indexOf(text, currentOffset + 1)) !== -1 | |
) { | |
offsets.push(currentOffset); | |
} | |
return offsets; | |
} | |
/** | |
* textContent の先頭位置から Node の該当範囲を返す | |
* もし Text Level Element 以外の要素が範囲の途中に入ってきた場合はスキップする | |
* | |
* @param {DocumentFragment | Element} target | |
* @param {number[]} textOffsets | |
* @param {number} textLength | |
* @throws {Error} - target に Comment, ProcessingInstruction が含まれる場合例外を投げる | |
* @returns {Generator<{ start: [Text, number], end: [Text, number] }>} | |
*/ | |
function* getNodeRange(target, textOffsets, textLength) { | |
let textOffset = textOffsets.shift(); | |
let contentOffset = 0; | |
/** @type {[Text, number] | null} */ | |
let tempStart = null; | |
for (const node of createNodeIterator(target, { | |
whatToShow: | |
NodeFilter.SHOW_TEXT | | |
NodeFilter.SHOW_ELEMENT | | |
NodeFilter.SHOW_COMMENT | | |
NodeFilter.SHOW_PROCESSING_INSTRUCTION, | |
})) { | |
if (textOffset === undefined) { | |
return; | |
} | |
switch (node.nodeType) { | |
case Node.TEXT_NODE: { | |
const content = node.nodeValue; | |
const contentLength = content.length; | |
while (textOffset !== undefined) { | |
if (tempStart !== null) { | |
if (textOffset + textLength <= contentOffset + contentLength) { | |
// ┣━━┝━━┫──┤ | |
// 途中で検索文字列が終わる場合(終了位置が一致する場合も含む) | |
yield { | |
start: tempStart, | |
end: [node, textOffset + textLength - contentOffset], | |
}; | |
textOffset = textOffsets.shift(); | |
tempStart = null; | |
} else { | |
// ┣━━├───┤━━┫ | |
// 検索文字列が包含する場合 | |
break; | |
} | |
} else { | |
if (contentOffset + contentLength <= textOffset) { | |
// ├───┤┣━━━┫ | |
// 検索文字列と交差しない場合 | |
break; | |
} else if ( | |
textOffset + textLength <= | |
contentOffset + contentLength | |
) { | |
// ├──┣━━┫──┤ | |
// 検索文字列を包含する場合(終了位置が一致する場合も含む) | |
const start = textOffset - contentOffset; | |
yield { | |
start: [node, start], | |
end: [node, start + textLength], | |
}; | |
textOffset = textOffsets.shift(); | |
} else { | |
// ├──┣━━┥━━┫ | |
// 検索文字列が超過する場合 | |
tempStart = [node, textOffset - contentOffset]; | |
break; | |
} | |
} | |
} | |
contentOffset += contentLength; | |
break; | |
} | |
case Node.ELEMENT_NODE: | |
// 途中に Text Level Element ではない要素が入っている場合はスキップする | |
// ただし Text Level Element であっても Void Element である場合もスキップする | |
if ( | |
tempStart !== null && | |
(!TEXT_LEVEL_ELEMENTS.includes(node.tagName) || | |
VOID_ELEMENTS.includes(node.tagName)) | |
) { | |
tempStart = null; | |
textOffset = textOffsets.shift(); | |
} | |
break; | |
case Node.COMMENT_NODE: | |
case Node.PROCESSING_INSTRUCTION_NODE: | |
throw new Error( | |
`target includes unsupported Node name: ${node.nodeName}, value: ${node.nodeValue}`, | |
); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
createNodeIterator
はこれ。https://gist.github.com/petamoriken/99e2167f57365d437e5b24f24da6feab