Skip to content

Instantly share code, notes, and snippets.

@petamoriken petamoriken/searchText.js
Last active Oct 23, 2019

Embed
What would you like to do?
HTML 内の文字列の検索
/** @see https://html.spec.whatwg.org/multipage/syntax.html#void-elements */
const VOID_ELEMENTS = Object.freeze([
"AREA",
"BASE",
"BR",
"COL",
"EMBED",
"HR",
"IMG",
"INPUT",
"LINK",
"META",
"PARAM",
"SOURCE",
"TRACK",
"WBR",
]);
/** @see https://html.spec.whatwg.org/multipage/text-level-semantics.html */
const TEXT_LEVEL_ELEMENTS = Object.freeze([
"A",
"EM",
"STRONG",
"SMALL",
"S",
"CITE",
"Q",
"DFN",
"ABBR",
"RUBY",
"RT",
"RP",
"DATA",
"TIME",
"CODE",
"VAR",
"SAMP",
"KBD",
"SUB",
"SUP",
"I",
"B",
"U",
"MARK",
"BDI",
"BDO",
"SPAN",
"BR",
"WBR",
]);
/**
* テキストを検索し該当範囲を返す
* 非表示 Element があることを考慮していない
*
* @param {DocumentFragment | Element} target
* @param {string} text
* @throws {Error} - target に Comment, ProcessingInstruction が含まれる場合例外を投げる
* @returns {Generator<Range>}
*/
function* searchText(target, text) {
const textLength = text.length;
if (textLength === 0) {
return;
}
const offsets = searchTextOffsets(target.textContent, text);
for (const rawRange of getNodeRange(target, offsets, textLength)) {
const range = new Range();
// Range が Text 全体を示しているとき
const startText = rawRange.start[0];
if (
startText === rawRange.end[0] &&
rawRange.start[1] === 0 &&
rawRange.end[1] === startText.nodeValue.length
) {
/** @type {Node} */
let target = startText;
// 親の Element がその Text しか持たないかつ Text Level Element の場合
// Range の範囲をその親全体にする
let parent;
while (
(parent = target.parentNode) !== null &&
parent.nodeType === Node.ELEMENT_NODE &&
parent.childNodes.length === 1 &&
TEXT_LEVEL_ELEMENTS.includes(parent.tagName)
) {
target = parent;
}
range.selectNode(target);
} else {
range.setStart(...rawRange.start);
range.setEnd(...rawRange.end);
}
yield range;
}
}
/**
* テキストから該当する先頭位置を返す
*
* @param {string} targetContent
* @param {string} text
* @return {number[]}
*/
function searchTextOffsets(targetContent, text) {
const offsets = [];
let currentOffset = -1;
while (
(currentOffset = targetContent.indexOf(text, currentOffset + 1)) !== -1
) {
offsets.push(currentOffset);
}
return offsets;
}
/**
* textContent の先頭位置から Node の該当範囲を返す
* もし Text Level Element 以外の要素が範囲の途中に入ってきた場合はスキップする
*
* @param {DocumentFragment | Element} target
* @param {number[]} textOffsets
* @param {number} textLength
* @throws {Error} - target に Comment, ProcessingInstruction が含まれる場合例外を投げる
* @returns {Generator<{ start: [Text, number], end: [Text, number] }>}
*/
function* getNodeRange(target, textOffsets, textLength) {
let textOffset = textOffsets.shift();
let contentOffset = 0;
/** @type {[Text, number] | null} */
let tempStart = null;
for (const node of createNodeIterator(target, {
whatToShow:
NodeFilter.SHOW_TEXT |
NodeFilter.SHOW_ELEMENT |
NodeFilter.SHOW_COMMENT |
NodeFilter.SHOW_PROCESSING_INSTRUCTION,
})) {
if (textOffset === undefined) {
return;
}
switch (node.nodeType) {
case Node.TEXT_NODE: {
const content = node.nodeValue;
const contentLength = content.length;
while (textOffset !== undefined) {
if (tempStart !== null) {
if (textOffset + textLength <= contentOffset + contentLength) {
// ┣━━┝━━┫──┤
// 途中で検索文字列が終わる場合(終了位置が一致する場合も含む)
yield {
start: tempStart,
end: [node, textOffset + textLength - contentOffset],
};
textOffset = textOffsets.shift();
tempStart = null;
} else {
// ┣━━├───┤━━┫
// 検索文字列が包含する場合
break;
}
} else {
if (contentOffset + contentLength <= textOffset) {
// ├───┤┣━━━┫
// 検索文字列と交差しない場合
break;
} else if (
textOffset + textLength <=
contentOffset + contentLength
) {
// ├──┣━━┫──┤
// 検索文字列を包含する場合(終了位置が一致する場合も含む)
const start = textOffset - contentOffset;
yield {
start: [node, start],
end: [node, start + textLength],
};
textOffset = textOffsets.shift();
} else {
// ├──┣━━┥━━┫
// 検索文字列が超過する場合
tempStart = [node, textOffset - contentOffset];
break;
}
}
}
contentOffset += contentLength;
break;
}
case Node.ELEMENT_NODE:
// 途中に Text Level Element ではない要素が入っている場合はスキップする
// ただし Text Level Element であっても Void Element である場合もスキップする
if (
tempStart !== null &&
(!TEXT_LEVEL_ELEMENTS.includes(node.tagName) ||
VOID_ELEMENTS.includes(node.tagName))
) {
tempStart = null;
textOffset = textOffsets.shift();
}
break;
case Node.COMMENT_NODE:
case Node.PROCESSING_INSTRUCTION_NODE:
throw new Error(
`target includes unsupported Node name: ${node.nodeName}, value: ${node.nodeValue}`,
);
}
}
}
@petamoriken

This comment has been minimized.

Copy link
Owner Author

petamoriken commented Oct 21, 2019

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.