Skip to content

Instantly share code, notes, and snippets.

@petamoriken
Last active October 23, 2019 07:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save petamoriken/70d64b963f312684017347c329432d53 to your computer and use it in GitHub Desktop.
Save petamoriken/70d64b963f312684017347c329432d53 to your computer and use it in GitHub Desktop.
HTML 内の文字列の検索
/** @see https://html.spec.whatwg.org/multipage/syntax.html#void-elements */
const VOID_ELEMENTS = Object.freeze([
"AREA",
"BASE",
"BR",
"COL",
"EMBED",
"HR",
"IMG",
"INPUT",
"LINK",
"META",
"PARAM",
"SOURCE",
"TRACK",
"WBR",
]);
/** @see https://html.spec.whatwg.org/multipage/text-level-semantics.html */
const TEXT_LEVEL_ELEMENTS = Object.freeze([
"A",
"EM",
"STRONG",
"SMALL",
"S",
"CITE",
"Q",
"DFN",
"ABBR",
"RUBY",
"RT",
"RP",
"DATA",
"TIME",
"CODE",
"VAR",
"SAMP",
"KBD",
"SUB",
"SUP",
"I",
"B",
"U",
"MARK",
"BDI",
"BDO",
"SPAN",
"BR",
"WBR",
]);
/**
* テキストを検索し該当範囲を返す
* 非表示 Element があることを考慮していない
*
* @param {DocumentFragment | Element} target
* @param {string} text
* @throws {Error} - target に Comment, ProcessingInstruction が含まれる場合例外を投げる
* @returns {Generator<Range>}
*/
function* searchText(target, text) {
const textLength = text.length;
if (textLength === 0) {
return;
}
const offsets = searchTextOffsets(target.textContent, text);
for (const rawRange of getNodeRange(target, offsets, textLength)) {
const range = new Range();
// Range が Text 全体を示しているとき
const startText = rawRange.start[0];
if (
startText === rawRange.end[0] &&
rawRange.start[1] === 0 &&
rawRange.end[1] === startText.nodeValue.length
) {
/** @type {Node} */
let target = startText;
// 親の Element がその Text しか持たないかつ Text Level Element の場合
// Range の範囲をその親全体にする
let parent;
while (
(parent = target.parentNode) !== null &&
parent.nodeType === Node.ELEMENT_NODE &&
parent.childNodes.length === 1 &&
TEXT_LEVEL_ELEMENTS.includes(parent.tagName)
) {
target = parent;
}
range.selectNode(target);
} else {
range.setStart(...rawRange.start);
range.setEnd(...rawRange.end);
}
yield range;
}
}
/**
* テキストから該当する先頭位置を返す
*
* @param {string} targetContent
* @param {string} text
* @return {number[]}
*/
function searchTextOffsets(targetContent, text) {
const offsets = [];
let currentOffset = -1;
while (
(currentOffset = targetContent.indexOf(text, currentOffset + 1)) !== -1
) {
offsets.push(currentOffset);
}
return offsets;
}
/**
* textContent の先頭位置から Node の該当範囲を返す
* もし Text Level Element 以外の要素が範囲の途中に入ってきた場合はスキップする
*
* @param {DocumentFragment | Element} target
* @param {number[]} textOffsets
* @param {number} textLength
* @throws {Error} - target に Comment, ProcessingInstruction が含まれる場合例外を投げる
* @returns {Generator<{ start: [Text, number], end: [Text, number] }>}
*/
function* getNodeRange(target, textOffsets, textLength) {
let textOffset = textOffsets.shift();
let contentOffset = 0;
/** @type {[Text, number] | null} */
let tempStart = null;
for (const node of createNodeIterator(target, {
whatToShow:
NodeFilter.SHOW_TEXT |
NodeFilter.SHOW_ELEMENT |
NodeFilter.SHOW_COMMENT |
NodeFilter.SHOW_PROCESSING_INSTRUCTION,
})) {
if (textOffset === undefined) {
return;
}
switch (node.nodeType) {
case Node.TEXT_NODE: {
const content = node.nodeValue;
const contentLength = content.length;
while (textOffset !== undefined) {
if (tempStart !== null) {
if (textOffset + textLength <= contentOffset + contentLength) {
// ┣━━┝━━┫──┤
// 途中で検索文字列が終わる場合(終了位置が一致する場合も含む)
yield {
start: tempStart,
end: [node, textOffset + textLength - contentOffset],
};
textOffset = textOffsets.shift();
tempStart = null;
} else {
// ┣━━├───┤━━┫
// 検索文字列が包含する場合
break;
}
} else {
if (contentOffset + contentLength <= textOffset) {
// ├───┤┣━━━┫
// 検索文字列と交差しない場合
break;
} else if (
textOffset + textLength <=
contentOffset + contentLength
) {
// ├──┣━━┫──┤
// 検索文字列を包含する場合(終了位置が一致する場合も含む)
const start = textOffset - contentOffset;
yield {
start: [node, start],
end: [node, start + textLength],
};
textOffset = textOffsets.shift();
} else {
// ├──┣━━┥━━┫
// 検索文字列が超過する場合
tempStart = [node, textOffset - contentOffset];
break;
}
}
}
contentOffset += contentLength;
break;
}
case Node.ELEMENT_NODE:
// 途中に Text Level Element ではない要素が入っている場合はスキップする
// ただし Text Level Element であっても Void Element である場合もスキップする
if (
tempStart !== null &&
(!TEXT_LEVEL_ELEMENTS.includes(node.tagName) ||
VOID_ELEMENTS.includes(node.tagName))
) {
tempStart = null;
textOffset = textOffsets.shift();
}
break;
case Node.COMMENT_NODE:
case Node.PROCESSING_INSTRUCTION_NODE:
throw new Error(
`target includes unsupported Node name: ${node.nodeName}, value: ${node.nodeValue}`,
);
}
}
}
@petamoriken
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment