Skip to content

Instantly share code, notes, and snippets.

@vsemozhetbyt
Last active February 24, 2016 03:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vsemozhetbyt/bfa76deac0d374b6b276 to your computer and use it in GitHub Desktop.
Save vsemozhetbyt/bfa76deac0d374b6b276 to your computer and use it in GitHub Desktop.
/******************************************************************************/
'use strict';
const fs = require('fs');
const pth = require('path');
const readline = require('readline');
const nwWin = nw.Window.get();
const nwDoc = window.document;
const eTocFile = nwDoc.querySelector('#tocFile');
const eOutputDir = nwDoc.querySelector('#outputDir');
const eSave = nwDoc.querySelector('#save');
const eInfo = nwDoc.querySelector('#info');
const eAudio = nwDoc.querySelector('#audio');
const eBrowser = nwDoc.querySelector('#browser');
const config = {};
const io = {};
const formatNumberRE = /\B(?=(?:\d{3})+$)/g;
const hour = 1000 * 60 * 60;
const toc = [];
const selectorsToCheck = ['#content-text #newsletter a[href]',
'#content-text .index-words a[href]'];
const selectorsToDelete = ['#content-text #social', '#content-text #newsletter'];
const selectorsToSave = ['#content-text'];
const checkFreq = 100;
const headwordsBuffer = new Set();
let prevURL = '';
let currURL = '';
let restMark;
let speedInfo = '?/h (?/min): ~? hours left, ~? days left.';
let saving = false;
let stop = false;
let exit = false;
/******************************************************************************/
nwWin.on('close', onExit);
try {
Object.assign(config, JSON.parse( fs.readFileSync('config.json', 'utf8') ));
eTocFile.setAttribute('nwworkingdir', pth.dirname(config.tocFile));
eOutputDir.setAttribute('nwworkingdir', config.outputDir);
} catch(e) {}
eTocFile.addEventListener('change', checkDirs);
eOutputDir.addEventListener('change', checkDirs);
eSave.addEventListener('click', saveDic);
checkDirs();
/******************************************************************************/
function checkDirs() {
config.tocFile = io.tocFile = eTocFile.title = eTocFile.value;
config.outputDir = io.outputDir = eOutputDir.title = eOutputDir.value;
if (io.tocFile && io.outputDir) {
eInfo.textContent = '';
eSave.disabled = false;
fs.writeFileSync('config.json', JSON.stringify(config), 'utf8');
} else {
eInfo.textContent = 'Select the TOC file and the output directory please.';
eSave.disabled = true;
}
}
/******************************************************************************/
function onStop() {
stop = true;
}
/******************************************************************************/
function onExit() {
if (saving) {
if (confirm('Do you want to exit? Dictionary saving will be suspended.')) {
stop = exit = true;
} else {
return;
}
} else {
nwWin.close(true);
}
}
/******************************************************************************/
function setSpeedInfo() {
const donePerHour = restMark - toc.length;
const donePerMin = Math.round(donePerHour / 60);
restMark = toc.length;
const hoursLeft = Math.round(restMark / donePerHour);
const daysLeft = Math.round(hoursLeft / 24);
speedInfo = `${donePerHour}/h (${donePerMin}/min): ~${hoursLeft} hours left, ~${daysLeft} days left.`;
eAudio.play();
}
/******************************************************************************/
function updateInfo(str) {
eInfo.textContent += `${str}\n`;
if (/(?:.*\n){11}/.test(eInfo.textContent)) {
eInfo.textContent = eInfo.textContent.replace(/[^]+((?:^.*\n){10})/m, '$1');
eInfo.scrollTop = eInfo.scrollHeight;
}
}
/******************************************************************************/
function logError(evt) {
if (currURL !== prevURL) {
fs.writeSync(io.errFile,
////////////////////////////////////////////////////////////////////////////////
`Iframe error (${new Date()}).
${currURL}
${JSON.stringify(evt)}
`
////////////////////////////////////////////////////////////////////////////////
, null, 'utf8');
}
eAudio.play();
}
/******************************************************************************/
function secureLow(str) {
return str.replace(/[\\\[\]{}@^~<>#()]/g, '\\$&');
}
/******************************************************************************/
function secureHigh(str, isHeadword) {
str = str.trim().replace(/[ \t]{2,}/g, ' ');
if (!isHeadword) {
str = str.replace(/^[ \t]+|[ \t]+$/gm, '')
.replace(/\n/g, '\n\t')
.replace(/(?:\n\t){2,}/g, '\n\t\\ \n\t');
}
return str;
}
/******************************************************************************/
function saveDic() {
saving = true;
eSave.removeEventListener('click', saveDic);
eSave.textContent = 'Stop';
eSave.addEventListener('click', onStop);
eTocFile.disabled = true;
eOutputDir.disabled = true;
try {
fs.accessSync(io.tocFile);
} catch(e) {
eInfo.textContent = `TOC file not available: '${e}.'`;
eTocFile.disabled = false;
eOutputDir.disabled = false;
eAudio.play();
}
const rl = readline.createInterface({
input: fs.createReadStream(io.tocFile, 'utf8'),
terminal: false,
historySize: 0
});
io.dicFile = fs.openSync(pth.join(io.outputDir, 'WordSpy.dic.dsl'), 'a');
io.logFile = fs.openSync(pth.join(io.outputDir, 'WordSpy.dic.log'), 'a+');
io.errFile = fs.openSync(pth.join(io.outputDir, 'WordSpy.dic.errors.log'), 'a');
if (fs.fstatSync(io.dicFile).size === 0) {
fs.writeSync(io.dicFile, '\uFEFF' +
////////////////////////////////////////////////////////////////////////////////
`#NAME "Word Spy 2016 (Eng-Eng)"
#INDEX_LANGUAGE "English"
#CONTENTS_LANGUAGE "English"
`
////////////////////////////////////////////////////////////////////////////////
, null, 'utf8');
}
eInfo.textContent = 'Reading the TOC file...\n';
rl.on('line', line => {
line = line.trim();
if (line) toc.push(line);
}).on('close', () => {
if (toc.length) {
eBrowser.addEventListener('load', checkDoc);
eBrowser.addEventListener('error', logError);
if (fs.fstatSync(io.logFile).size !== 0) {
const rl = readline.createInterface({
input: fs.createReadStream(null, {encoding: 'utf8', fd: io.logFile, autoClose: false}),
terminal: false,
historySize: 0
});
updateInfo('Reading the log file...');
let lastLine;
rl.on('line', line => {
line = line.trim();
if (line) lastLine = line;
}).on('close', () => {
toc.splice(0, toc.indexOf(lastLine) + 1);
restMark = toc.length;
global.setInterval(setSpeedInfo, hour).unref();
updateInfo(
nwDoc.title = nwWin.title =
`Rest: ${ toc.length.toString().replace(formatNumberRE, ' ') }.`
);
getDoc(toc.shift());
});
} else {
restMark = toc.length;
global.setInterval(setSpeedInfo, hour).unref();
updateInfo(
nwDoc.title = nwWin.title =
`Rest: ${ toc.length.toString().replace(formatNumberRE, ' ') }.`
);
getDoc(toc.shift());
}
} else {
eInfo.textContent = 'No URLs found.';
endSaving();
}
});
}
/******************************************************************************/
function getDoc(url) {
if (stop) {
updateInfo(nwDoc.title = nwWin.title = 'Stop on demand.');
endSaving();
return;
}
prevURL = currURL;
currURL = url;
updateInfo(` ${url}`);
eBrowser.src = url;
}
/******************************************************************************/
function checkDoc() {
const iWin = eBrowser.contentWindow;
const iDoc = iWin.document;
const iLoc = iWin.location.href;
let iter = 0;
const checker = global.setInterval(() => {
iter++;
if (iDoc.querySelector(selectorsToCheck.join(', '))) {
global.clearInterval(checker);
processDoc(iWin, iDoc, iLoc, iter);
} else if (iter > 50) {
global.clearInterval(checker);
if (iLoc === currURL) {
eAudio.play();
updateInfo( nwDoc.title = nwWin.title = 'HTTP error. Retrying...' );
getDoc(currURL);
} else {
fs.writeSync(io.errFile,
////////////////////////////////////////////////////////////////////////////////
`Something wrong (${new Date()}).
${currURL}
`
////////////////////////////////////////////////////////////////////////////////
, null, 'utf8');
updateInfo( nwDoc.title = nwWin.title = 'Something wrong...' );
endSaving();
}
}
}, checkFreq);
}
/******************************************************************************/
function processDoc(iWin, iDoc, iLoc, iter) {
selectorsToDelete.forEach(s => {
const e = iDoc.querySelector(s);
if (e) e.parentNode.removeChild(e);
});
const elmToSave = iDoc.querySelector(selectorsToSave.join(', '));
const textNodes = iDoc.evaluate(
'.//text()', elmToSave, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null
);
for (let i = 0, t; t = textNodes.snapshotItem(i); i++) {
t.textContent = secureLow(t.textContent);
}
if (iLoc.startsWith('http://www.wordspy.com/index.php?word=')) {
headwordsBuffer.add(secureHigh(secureLow(iDoc.querySelector('#content-header').innerText), true));
if (elmToSave.querySelector('#synonyms .word-meta-data')) {
elmToSave.querySelector('#synonyms .word-meta-data').innerText.split('·')
.forEach(synonym => { headwordsBuffer.add(secureHigh(synonym, true)); });
}
if (elmToSave.querySelector('#inflections .word-meta-data')) {
Array.from(elmToSave.querySelectorAll('#inflections .italicized'))
.forEach(abbrv => {abbrv.style.visibility = 'hidden';});
elmToSave.querySelector('#inflections .word-meta-data').innerText.split('·')
.forEach(form => {
headwordsBuffer.add(secureHigh(form.trim().replace(/\s+\.$/, ''), true));
});
Array.from(elmToSave.querySelectorAll('#inflections .italicized'))
.forEach(abbrv => {abbrv.style.visibility = 'visible';});
}
} else {
headwordsBuffer.add('\\# ' + secureHigh(secureLow(iDoc.querySelector('#content-header').innerText), true));
}
Array.from(elmToSave.querySelectorAll('blockquote, div, h3, ol, p, table, ul'))
.forEach(elm => {
elm.insertAdjacentHTML('beforebegin', '<br><br>');
elm.insertAdjacentHTML('afterend', '<br><br>');
});
Array.from(elmToSave.querySelectorAll('hr'))
.forEach(elm => {
elm.insertAdjacentHTML('afterend', '<br><br>----------<br><br>');
});
Array.from(elmToSave.querySelectorAll('iframe'))
.forEach(elm => {
elm.insertAdjacentHTML('afterend',
'<br><br>\\[Embeded video or page. See on the site.\\]<br><br>');
});
Array.from(elmToSave.querySelectorAll('blockquote'))
.forEach(elm => {
if (elm.lastChild.tagName === 'BR') elm.removeChild(elm.lastChild);
elm.insertAdjacentHTML('afterbegin', '“');
elm.insertAdjacentHTML('beforeend', '”');
});
Array.from(elmToSave.querySelectorAll('ul'))
.forEach(elm => {
Array.from(elm.querySelectorAll('li')).forEach(li => {
li.insertAdjacentHTML('afterbegin', '• ');
});
});
Array.from(elmToSave.querySelectorAll('ol'))
.forEach(elm => {
Array.from(elm.querySelectorAll('li')).forEach((li, i) => {
li.insertAdjacentHTML('afterbegin', `${i + 1}. `);
});
});
Array.from(elmToSave.querySelectorAll('smirk, flame'))
.forEach(elm => {
elm.insertAdjacentHTML('afterbegin', `\\&lt;${elm.tagName.toLowerCase()}\\&gt;`);
elm.insertAdjacentHTML('beforeend', `\\&lt;/${elm.tagName.toLowerCase()}\\&gt;`);
});
Array.from(elmToSave.querySelectorAll('span.add-separator'))
.forEach(elm => {
elm.insertAdjacentHTML('afterbegin', ' · ');
});
Array.from(elmToSave.querySelectorAll('h3'))
.forEach(elm => {
elm.insertAdjacentHTML('afterbegin', '[b][c steelblue]');
elm.insertAdjacentHTML('beforeend', '[/c][/b]');
});
Array.from(elmToSave.querySelectorAll('div.word-citation-year'))
.forEach(elm => {
elm.insertAdjacentHTML('afterbegin', '[b][c gray]');
elm.insertAdjacentHTML('beforeend', '[/c][/b]');
});
Array.from(elmToSave.querySelectorAll('i, em, span.italicized'))
.forEach(elm => {
if (!iDoc.evaluate( './ancestor::*[@data-dsl-i]', elm,
null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue) {
elm.insertAdjacentHTML('afterbegin', '[i]');
elm.insertAdjacentHTML('beforeend', '[/i]');
elm.setAttribute('data-dsl-i', '');
}
});
Array.from(elmToSave.querySelectorAll('span'))
.filter(elm => iWin.getComputedStyle(elm).fontStyle !== 'normal')
.forEach(elm => {
if (!iDoc.evaluate( './ancestor::*[@data-dsl-i]', elm,
null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue) {
elm.insertAdjacentHTML('afterbegin', '[i]');
elm.insertAdjacentHTML('beforeend', '[/i]');
elm.setAttribute('data-dsl-i', '');
}
});
Array.from(elmToSave.querySelectorAll('b, span.headword'))
.forEach(elm => {
if (!iDoc.evaluate( './ancestor::*[@data-dsl-b]', elm,
null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue) {
elm.insertAdjacentHTML('afterbegin', '[b]');
elm.insertAdjacentHTML('beforeend', '[/b]');
elm.setAttribute('data-dsl-b', '');
}
});
Array.from(elmToSave.querySelectorAll('span'))
.filter(elm => iWin.getComputedStyle(elm).fontWeight === 'bold')
.forEach(elm => {
if (!iDoc.evaluate( './ancestor::*[@data-dsl-b]', elm,
null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue) {
elm.insertAdjacentHTML('afterbegin', '[b]');
elm.insertAdjacentHTML('beforeend', '[/b]');
elm.setAttribute('data-dsl-b', '');
}
});
Array.from(elmToSave.querySelectorAll('sup'))
.forEach(elm => {
elm.insertAdjacentHTML('afterbegin', '[sup]');
elm.insertAdjacentHTML('beforeend', '[/sup]');
});
Array.from(elmToSave.querySelectorAll('a[href]'))
.forEach(elm => {
if (elm.href.replace(/http:\/\/(?:www\.)?/, '') ===
elm.innerText.trim().replace(/http:\/\/(?:www\.)?/, '')) {
elm.insertAdjacentHTML('afterbegin', '[url]');
elm.insertAdjacentHTML('beforeend', '[/url]');
} else if (/http:\/\/www\.wordspy\.com\/index\.php\?word=/.test(elm.href)) {
elm.insertAdjacentHTML('afterbegin', '[ref]');
elm.insertAdjacentHTML('beforeend', '[/ref]');
} else if (/http:\/\/www\.wordspy\.com\/index\.php\?tag=/.test(elm.href)) {
elm.insertAdjacentHTML('afterbegin', '[ref]\\# ');
elm.insertAdjacentHTML('beforeend', '[/ref]');
} else {
if (elm.innerText.trim()) {
elm.insertAdjacentHTML('afterbegin', '[u]');
elm.insertAdjacentHTML('beforeend', '[/u]');
}
elm.insertAdjacentHTML('beforeend', ` ([url]${secureLow(elm.href)}[/url])`);
}
});
Array.from(elmToSave.querySelectorAll('img'))
.forEach(elm => {
elm.insertAdjacentHTML('afterend',
`<br><br>[url]${secureLow(elm.src)}[/url]<br><br>`);
});
Array.from(elmToSave.querySelectorAll('div.word-citation'))
.forEach(elm => {
elm.insertAdjacentHTML('afterbegin', '[m2]');
elm.insertAdjacentHTML('beforeend', '[/m]');
});
fs.writeSync(io.dicFile,
////////////////////////////////////////////////////////////////////////////////
`${Array.from(headwordsBuffer).join('\n')}
${ secureHigh(elmToSave.innerText) }
`
////////////////////////////////////////////////////////////////////////////////
, null, 'utf8');
fs.writeSync(io.logFile, `${iLoc}\n`, null, 'utf8');
updateInfo( ` ${(iter * checkFreq / 1000).toFixed(1)} s. ${speedInfo}` );
headwordsBuffer.clear();
if (toc.length) {
updateInfo(
nwDoc.title = nwWin.title =
`Rest: ${ toc.length.toString().replace(formatNumberRE, ' ') }.`);
getDoc(toc.shift());
} else {
updateInfo( nwDoc.title = nwWin.title = 'Saving complete.' );
endSaving();
}
}
/******************************************************************************/
function endSaving() {
io.dicFile && fs.closeSync(io.dicFile);
io.logFile && fs.closeSync(io.logFile);
io.errFile && fs.closeSync(io.errFile);
io.dicFile = io.logFile = io.errFile = null;
eBrowser.removeEventListener('load', checkDoc);
eBrowser.removeEventListener('error', logError);
eSave.removeEventListener('click', onStop);
eSave.textContent = 'Save';
eSave.addEventListener('click', saveDic);
eTocFile.disabled = false;
eOutputDir.disabled = false;
saving = false;
eBrowser.src = 'about:blank';
eAudio.play();
if (exit) {
nwWin.close(true);
}
}
/******************************************************************************/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment