Skip to content

Instantly share code, notes, and snippets.

@vsemozhetbyt
Created February 19, 2016 19:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vsemozhetbyt/986e37613d921087eb53 to your computer and use it in GitHub Desktop.
Save vsemozhetbyt/986e37613d921087eb53 to your computer and use it in GitHub Desktop.
'use strict';
/******************************************************************************/
console.log('Requiring modules...');
const fs = require('fs');
const path = require('path');
const readline = require('readline');
const execSync = require('child_process').execFileSync;
const urlParser = require('url');
const jsdom = require('jsdom');
const outDir = process.argv[2] || __dirname;
const tocFile = fs.openSync(path.join(outDir, 'WordSpy.toc.txt'), 'a');
const logFile = fs.openSync(path.join(outDir, 'WordSpy.toc.log'), 'a+');
const errFile = fs.openSync(path.join(outDir, 'WordSpy.toc.errors.log'), 'a');
const tocURLs = Array.from('1abcdefghijklmnopqrstuvwxyz',
el => `http://www.wordspy.com/index.php?alpha=${el}`)
.concat('http://www.wordspy.com/index.php?tag=all-by-category');
const selectorsToCheck = ['.index-words a[href]'];
const checkFrequency = 100;
let prevURL = '';
let currURL = '';
let terminate = false;
/******************************************************************************/
process.on('exit', () => {
fs.closeSync(tocFile);
fs.closeSync(logFile);
fs.closeSync(errFile);
playAlert();
});
process.on('SIGINT', () => {
terminate = true;
});
/******************************************************************************/
if (fs.fstatSync(logFile).size === 0) {
console.log(process.title = '#');
getDoc(tocURLs.shift());
} else {
const rl = readline.createInterface({
input: fs.createReadStream(null, {encoding: 'utf8', fd: logFile, autoClose: false}),
terminal: false,
historySize: 0
});
let lastLine;
console.log('Reading the log file...');
rl.on('line', line => {
line = line.trim();
if (line) lastLine = line;
}).on('close', () => {
tocURLs.splice(0, tocURLs.indexOf(lastLine) + 1);
let nextURL = tocURLs.shift();
console.log(process.title = urlParser.parse(nextURL, true).query.alpha ||
urlParser.parse(nextURL, true).query.tag);
getDoc(nextURL);
});
}
/******************************************************************************/
function playAlert() {
execSync(
'f:\\BAK\\prg\\mm\\ffmpeg\\bin\\ffplay.exe',
['-v', 'quiet', '-nodisp', '-autoexit', '-af', 'volume=1.0',
'c:\\WINDOWS\\Media\\Windows Ringin.wav']
);
}
/******************************************************************************/
function getDoc(url) {
if (terminate) {
console.log('Exit on demand.');
process.exit();
}
prevURL = currURL;
currURL = url;
console.log(` ${url}`);
jsdom.env({ url, done: checkDoc, features: {
FetchExternalResources: ["script"], ProcessExternalResources: ["script"]
} }); //, proxy: 'http://127.0.0.1:8888'
}
/******************************************************************************/
function checkDoc(err, window) {
if (err) {
playAlert();
if (currURL !== prevURL) {
fs.writeSync(errFile,
////////////////////////////////////////////////////////////////////////////////
`jsdom error (${new Date()}).
${currURL}
${err}
`
////////////////////////////////////////////////////////////////////////////////
, null, 'utf8');
}
console.error(` ${err}`);
console.error(process.title = 'jsdom error. Retrying...');
getDoc(currURL);
} else {
const doc = window.document;
const loc = window.location.href;
let iter = 0;
const checker = setInterval(() => {
iter++;
if (doc.querySelectorAll(selectorsToCheck.join(', ')).length) {
clearInterval(checker);
console.log(` ${iter * checkFrequency} ms`);
processDoc(doc, loc);
} else if (iter > 50) {
clearInterval(checker);
console.log(` ${iter * checkFrequency} ms`);
if (loc === currURL) {
playAlert();
console.error(process.title = 'HTTP error. Retrying...');
getDoc(currURL);
} else {
fs.writeSync(errFile,
////////////////////////////////////////////////////////////////////////////////
`Something wrong (${new Date()}).
${currURL}
`
////////////////////////////////////////////////////////////////////////////////
, null, 'utf8');
console.error('Something wrong...');
process.exit();
}
}
}, checkFrequency);
}
}
/******************************************************************************/
function processDoc(doc, loc) {
const links = doc.querySelectorAll('.index-words a[href]');
if (loc === 'http://www.wordspy.com/index.php?tag=all-by-category') {
fs.writeSync(tocFile, `${loc}\n`, null, 'utf8');
}
fs.writeSync(tocFile,
`${ Array.from(links, el => el.href).join('\n') }\n`,
null, 'utf8');
fs.writeSync(logFile, `${loc}\n`, null, 'utf8');
console.log(process.title = ` Links: ${links.length}.`);
if (tocURLs.length) {
let nextURL = tocURLs.shift();
console.log(process.title = urlParser.parse(nextURL, true).query.alpha
||
urlParser.parse(nextURL, true).query.tag);
getDoc(nextURL);
} else {
process.exit();
}
}
/******************************************************************************/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment