Last active
August 14, 2020 09:29
-
-
Save ficapy/3dae55df3b55346d8a1744cd1787ef20 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let a = [] | |
function _add_empty(node, raw){ | |
// 如果多个标签合并成一个,最低保证合并的时候左右存在空格 | |
function inner(node){ | |
let innertext = node.innerText | |
if (!innertext){ | |
return | |
} | |
let content = innertext.trim() | |
pos = raw.indexOf(content) | |
if (raw.indexOf(' '+content) == -1 && raw.indexOf(content+' ') == -1){ | |
raw = raw.replace(content, ' ' + content + ' ') | |
} | |
var nodes = node.childNodes; | |
for (var i = 0; i <nodes.length; i++){ | |
if(!nodes[i]){ | |
continue; | |
} | |
if(nodes[i].childNodes.length > 0){ | |
inner(nodes[i]); | |
} | |
} | |
} | |
inner(node) | |
return raw | |
} | |
function check(node){ | |
// 返回true表示不再进行分割 | |
rect = node.getBoundingClientRect() | |
if (node.hidden || (rect.width < 10 && rect.height < 10)){ | |
return true | |
} | |
let innertext = node.innerText | |
if (!innertext){ | |
return true | |
} | |
let content = innertext.trim() | |
if (node.childNodes.length == 0){ | |
if (content.length > 0 && content.split(' ').length > 3){ | |
a.push(content) | |
} | |
return true | |
} | |
if (content.includes('\n')){ | |
return false | |
}else{ | |
if (content.length >0 && content.split(' ').length > 3){ | |
a.push(_add_empty(node, content)) | |
} | |
return true | |
} | |
} | |
function loop(node){ | |
if (check(node)){ | |
return | |
} | |
var nodes = node.childNodes; | |
for (var i = 0; i <nodes.length; i++){ | |
if (nodes[i] && typeof nodes[i].getBoundingClientRect === 'function'){ | |
loop(nodes[i]) | |
} | |
} | |
} | |
let s = 'body' | |
loop(document.querySelector(s)) | |
console.log(a.join('\n')) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment