Skip to content

Instantly share code, notes, and snippets.

@sorrycc
Created November 18, 2022 13:30
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save sorrycc/9d48bbc767e212baeee3c24e3a8f793f to your computer and use it in GitHub Desktop.
Save sorrycc/9d48bbc767e212baeee3c24e3a8f793f to your computer and use it in GitHub Desktop.
function getContainer(root) {
root ||= document.body;
if (!root.innerText) return null;
const totalWords = root.innerText.match(/\S+/g).length;
let ps = root.querySelectorAll('p');
if (!ps.length) ps = root.querySelectorAll('div');
if (!ps.length) return null;
let container = null;
let maxWords = 0;
for (let p of ps) {
if (!p.innerText) continue;
if (!p.innerText.match(/\S+/g)) continue;
const numWords = p.innerText.match(/\S+/g).length;
if (numWords > maxWords) {
maxWords = numWords;
container = p;
}
}
let selectedWords = maxWords;
while (selectedWords / totalWords < 0.4 && container !== root && container.parentElement && container.parentElement.innerText) {
container = container.parentElement;
selectedWords = container.innerText.match(/\S+/g).length;
}
while (['p', 'blockquote', 'video', 'figure'].includes(container.tagName.toLowerCase())) {
container = container.parentElement;
}
return container;
}
function getNodes(container) {
const nodes = [];
const blockTags = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'li', 'p'];
for (const blockTag of blockTags) {
const elements = container.querySelectorAll(blockTag);
for (const element of elements) {
nodes.push(element);
}
}
return nodes;
}
function copyNodes(nodes) {
const copiedNodes = [];
for (const node of nodes) {
const copiedNode = node.cloneNode(true);
// node.insertAfter(copiedNode);
if (node.nextSibling) {
node.parentNode.insertBefore(copiedNode, node.nextSibling);
} else {
node.parentNode.appendChild(copiedNode);
}
copiedNodes.push(copiedNode);
}
return copiedNodes;
}
const API_URL = 'http://localhost:8080/translate';
async function translate(text) {
const res = await fetch(API_URL, {
method: 'POST',
mode: 'cors',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({
text,
source_lang: 'auto',
target_lang: 'ZH',
}),
});
if (res.status !== 200) {
throw new Error(`Translate failed, ${res.status} ${res.statusText}`);
}
const json = await res.json();
const { data } = json;
if (!data) {
throw new Error(`Translate failed, data is null`);
}
return data;
}
function textToBlocks(text) {
const SEP = '\n\n';
const MAX_BLOCK_LENGTH = 4000;
const chunks = text.split(SEP);
const result = [];
let block = [];
let blockCount = 0;
for (const chunk of chunks) {
const chunkSize = chunk.length;
// console.log('> chunkSize', chunkSize, blockCount + chunkSize);
if (chunkSize >= MAX_BLOCK_LENGTH) throw new Error(`chunk is too large`);
if (blockCount + chunkSize > MAX_BLOCK_LENGTH) {
result.push(block.join(SEP));
block = [chunk];
blockCount = chunkSize;
} else {
blockCount += chunkSize;
block.push(chunk);
}
}
if (block.length) result.push(block.join(SEP));
return result;
}
const delay = (ms) => new Promise((res) => setTimeout(res, ms));
(async () => {
const container = getContainer();
console.log('> container', container);
if (!container) {
throw new Error('No container found');
}
const nodes = [];
nodes.push(...getNodes(container));
if (!nodes.length) {
throw new Error('No nodes found');
}
const titleNode = document.querySelector('h1');
if (titleNode && !nodes.includes(titleNode)) {
nodes.unshift(titleNode);
}
const textToTranslate = nodes.map(node => node.innerText.replace(/\n\n/g, '^^^')).join('\n\n');
const blocks = textToBlocks(textToTranslate);
console.log('blocks', blocks.map(b => b.length).join(', '));
let initialIndex = 0;
let blockIndex = 0;
for (const block of blocks) {
console.log(`Translating block ${blockIndex + 1}/${blocks.length}...`);
const translatedText = await translate(block);
console.log('Translated, copying nodes...');
const translatedNodes = translatedText.split('\n\n').map(item => item.replace(/\^\^\^/g, '\n\n'));
const len = translatedNodes.length;
console.log('Check if length is equal', len, nodes.length, initialIndex);
for (let i = initialIndex; i < initialIndex + len; i++) {
const copiedNodes = copyNodes([nodes[i]]);
copiedNodes[0].innerHTML = `<font style="border-bottom:2px solid #72ECE9;vertical-align:inherit">${translatedNodes[i - initialIndex]}</font>`;
}
initialIndex += len;
console.log('x initialIndex', initialIndex);
blockIndex += 1;
if (blockIndex < blocks.length) {
const delayTime = 1000 + Math.floor(Math.random() * 3000);
console.log(`delay ${delayTime}ms...`);
await delay(delayTime);
}
}
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment